diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 06025c730700f..92f3b3ce83297 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -94,8 +94,6 @@ repos: stages: [manual] additional_dependencies: &pyright_dependencies - pyright@1.1.258 -- repo: local - hooks: - id: pyright_reportGeneralTypeIssues name: pyright reportGeneralTypeIssues entry: pyright --skipunannotated -p pyright_reportGeneralTypeIssues.json @@ -105,8 +103,6 @@ repos: types: [python] stages: [manual] additional_dependencies: *pyright_dependencies -- repo: local - hooks: - id: mypy name: mypy entry: mypy @@ -115,8 +111,6 @@ repos: pass_filenames: false types: [python] stages: [manual] -- repo: local - hooks: - id: flake8-rst name: flake8-rst description: Run flake8 on code snippets in docstrings or RST files @@ -237,3 +231,15 @@ repos: additional_dependencies: - flake8==4.0.1 - flake8-pyi==22.5.1 + - id: future-annotations + name: import annotations from __future__ + entry: 'from __future__ import annotations' + language: pygrep + args: [--negate] + files: ^pandas/ + types: [python] + exclude: | + (?x) + /(__init__\.py)|(api\.py)|(_version\.py)|(testing\.py)|(conftest\.py)$ + |/tests/ + |/_testing/ diff --git a/pandas/_config/dates.py b/pandas/_config/dates.py index 5bf2b49ce5904..b37831f96eb73 100644 --- a/pandas/_config/dates.py +++ b/pandas/_config/dates.py @@ -1,6 +1,8 @@ """ config for datetime formatting """ +from __future__ import annotations + from pandas._config import config as cf pc_date_dayfirst_doc = """ diff --git a/pandas/compat/chainmap.py b/pandas/compat/chainmap.py index 9af7962fe4ad0..5bec8e5fa1913 100644 --- a/pandas/compat/chainmap.py +++ b/pandas/compat/chainmap.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from typing import ( ChainMap, TypeVar, diff --git a/pandas/compat/pyarrow.py b/pandas/compat/pyarrow.py index eef2bb6639c36..833cda20368a2 100644 --- a/pandas/compat/pyarrow.py +++ b/pandas/compat/pyarrow.py @@ -1,5 +1,7 @@ """ support pyarrow compatibility across versions """ +from __future__ import annotations + from pandas.util.version import Version try: diff --git a/pandas/core/_numba/kernels/shared.py b/pandas/core/_numba/kernels/shared.py index ec25e78a8d897..6e6bcef590d06 100644 --- a/pandas/core/_numba/kernels/shared.py +++ b/pandas/core/_numba/kernels/shared.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import numba import numpy as np diff --git a/pandas/core/array_algos/transforms.py b/pandas/core/array_algos/transforms.py index 27aebb9911e83..93b029c21760e 100644 --- a/pandas/core/array_algos/transforms.py +++ b/pandas/core/array_algos/transforms.py @@ -2,6 +2,8 @@ transforms.py is for shape-preserving functions. """ +from __future__ import annotations + import numpy as np diff --git a/pandas/core/arraylike.py b/pandas/core/arraylike.py index d2875be0f58cd..280a599de84ed 100644 --- a/pandas/core/arraylike.py +++ b/pandas/core/arraylike.py @@ -4,6 +4,8 @@ Index ExtensionArray """ +from __future__ import annotations + import operator from typing import Any import warnings diff --git a/pandas/core/computation/check.py b/pandas/core/computation/check.py index 7be617de63a40..3221b158241f5 100644 --- a/pandas/core/computation/check.py +++ b/pandas/core/computation/check.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from pandas.compat._optional import import_optional_dependency ne = import_optional_dependency("numexpr", errors="warn") diff --git a/pandas/core/computation/common.py b/pandas/core/computation/common.py index ebf4d4ea9154e..a1ac3dfa06ee0 100644 --- a/pandas/core/computation/common.py +++ b/pandas/core/computation/common.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from functools import reduce import numpy as np diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index a49e35539656f..8c1a3fece255e 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -9,6 +9,8 @@ module is imported, register them here rather than in the module. """ +from __future__ import annotations + import os from typing import Callable import warnings diff --git a/pandas/core/dtypes/inference.py b/pandas/core/dtypes/inference.py index f47aeb16e19f1..893e4a9be58ef 100644 --- a/pandas/core/dtypes/inference.py +++ b/pandas/core/dtypes/inference.py @@ -1,5 +1,7 @@ """ basic inference routines """ +from __future__ import annotations + from collections import abc from numbers import Number import re diff --git a/pandas/core/exchange/buffer.py b/pandas/core/exchange/buffer.py index 098c596bff4cd..a3b05a0c5d24a 100644 --- a/pandas/core/exchange/buffer.py +++ b/pandas/core/exchange/buffer.py @@ -1,7 +1,4 @@ -from typing import ( - Optional, - Tuple, -) +from __future__ import annotations import numpy as np from packaging import version @@ -60,7 +57,7 @@ def __dlpack__(self): return self._x.__dlpack__() raise NotImplementedError("__dlpack__") - def __dlpack_device__(self) -> Tuple[DlpackDeviceType, Optional[int]]: + def __dlpack_device__(self) -> tuple[DlpackDeviceType, int | None]: """ Device type and device ID for where the data in the buffer resides. """ diff --git a/pandas/core/exchange/dataframe_protocol.py b/pandas/core/exchange/dataframe_protocol.py index ee2ae609e73f9..367b906332741 100644 --- a/pandas/core/exchange/dataframe_protocol.py +++ b/pandas/core/exchange/dataframe_protocol.py @@ -2,6 +2,8 @@ A verbatim copy (vendored) of the spec from https://github.com/data-apis/dataframe-api """ +from __future__ import annotations + from abc import ( ABC, abstractmethod, @@ -9,11 +11,8 @@ import enum from typing import ( Any, - Dict, Iterable, - Optional, Sequence, - Tuple, TypedDict, ) @@ -90,18 +89,18 @@ class ColumnNullType(enum.IntEnum): class ColumnBuffers(TypedDict): # first element is a buffer containing the column data; # second element is the data buffer's associated dtype - data: Tuple["Buffer", Any] + data: tuple[Buffer, Any] # first element is a buffer containing mask values indicating missing data; # second element is the mask value buffer's associated dtype. # None if the null representation is not a bit or byte mask - validity: Optional[Tuple["Buffer", Any]] + validity: tuple[Buffer, Any] | None # first element is a buffer containing the offset values for # variable-size binary data (e.g., variable-length strings); # second element is the offsets buffer's associated dtype. # None if the data buffer does not have an associated offsets buffer - offsets: Optional[Tuple["Buffer", Any]] + offsets: tuple[Buffer, Any] | None class CategoricalDescription(TypedDict): @@ -111,7 +110,7 @@ class CategoricalDescription(TypedDict): is_dictionary: bool # Python-level only (e.g. ``{int: str}``). # None if not a dictionary-style categorical. - mapping: Optional[dict] + mapping: dict | None class Buffer(ABC): @@ -161,7 +160,7 @@ def __dlpack__(self): raise NotImplementedError("__dlpack__") @abstractmethod - def __dlpack_device__(self) -> Tuple[DlpackDeviceType, Optional[int]]: + def __dlpack_device__(self) -> tuple[DlpackDeviceType, int | None]: """ Device type and device ID for where the data in the buffer resides. Uses device type codes matching DLPack. @@ -239,7 +238,7 @@ def offset(self) -> int: @property @abstractmethod - def dtype(self) -> Tuple[DtypeKind, int, str, str]: + def dtype(self) -> tuple[DtypeKind, int, str, str]: """ Dtype description as a tuple ``(kind, bit-width, format string, endianness)``. @@ -293,7 +292,7 @@ def describe_categorical(self) -> CategoricalDescription: @property @abstractmethod - def describe_null(self) -> Tuple[ColumnNullType, Any]: + def describe_null(self) -> tuple[ColumnNullType, Any]: """ Return the missing value (or "null") representation the column dtype uses, as a tuple ``(kind, value)``. @@ -306,7 +305,7 @@ def describe_null(self) -> Tuple[ColumnNullType, Any]: @property @abstractmethod - def null_count(self) -> Optional[int]: + def null_count(self) -> int | None: """ Number of null elements, if known. @@ -316,7 +315,7 @@ def null_count(self) -> Optional[int]: @property @abstractmethod - def metadata(self) -> Dict[str, Any]: + def metadata(self) -> dict[str, Any]: """ The metadata for the column. See `DataFrame.metadata` for more details. """ @@ -330,7 +329,7 @@ def num_chunks(self) -> int: pass @abstractmethod - def get_chunks(self, n_chunks: Optional[int] = None) -> Iterable["Column"]: + def get_chunks(self, n_chunks: int | None = None) -> Iterable[Column]: """ Return an iterator yielding the chunks. @@ -395,7 +394,7 @@ def __dataframe__(self, nan_as_null: bool = False, allow_copy: bool = True): @property @abstractmethod - def metadata(self) -> Dict[str, Any]: + def metadata(self) -> dict[str, Any]: """ The metadata for the data frame, as a dictionary with string keys. The contents of `metadata` may be anything, they are meant for a library @@ -415,7 +414,7 @@ def num_columns(self) -> int: pass @abstractmethod - def num_rows(self) -> Optional[int]: + def num_rows(self) -> int | None: # TODO: not happy with Optional, but need to flag it may be expensive # why include it if it may be None - what do we expect consumers # to do here? @@ -460,21 +459,21 @@ def get_columns(self) -> Iterable[Column]: pass @abstractmethod - def select_columns(self, indices: Sequence[int]) -> "DataFrame": + def select_columns(self, indices: Sequence[int]) -> DataFrame: """ Create a new DataFrame by selecting a subset of columns by index. """ pass @abstractmethod - def select_columns_by_name(self, names: Sequence[str]) -> "DataFrame": + def select_columns_by_name(self, names: Sequence[str]) -> DataFrame: """ Create a new DataFrame by selecting a subset of columns by name. """ pass @abstractmethod - def get_chunks(self, n_chunks: Optional[int] = None) -> Iterable["DataFrame"]: + def get_chunks(self, n_chunks: int | None = None) -> Iterable[DataFrame]: """ Return an iterator yielding the chunks. diff --git a/pandas/core/exchange/from_dataframe.py b/pandas/core/exchange/from_dataframe.py index cb1967b5701a0..a33e47ba3b68e 100644 --- a/pandas/core/exchange/from_dataframe.py +++ b/pandas/core/exchange/from_dataframe.py @@ -1,13 +1,8 @@ +from __future__ import annotations + import ctypes import re -from typing import ( - Any, - Dict, - List, - Optional, - Tuple, - Union, -) +from typing import Any import numpy as np @@ -24,7 +19,7 @@ Endianness, ) -_NP_DTYPES: Dict[DtypeKind, Dict[int, Any]] = { +_NP_DTYPES: dict[DtypeKind, dict[int, Any]] = { DtypeKind.INT: {8: np.int8, 16: np.int16, 32: np.int32, 64: np.int64}, DtypeKind.UINT: {8: np.uint8, 16: np.uint16, 32: np.uint32, 64: np.uint64}, DtypeKind.FLOAT: {32: np.float32, 64: np.float64}, @@ -108,7 +103,7 @@ def protocol_df_chunk_to_pandas(df: DataFrameXchg) -> pd.DataFrame: """ # We need a dict of columns here, with each column being a NumPy array (at # least for now, deal with non-NumPy dtypes later). - columns: Dict[str, Any] = {} + columns: dict[str, Any] = {} buffers = [] # hold on to buffers, keeps memory alive for name in df.column_names(): if not isinstance(name, str): @@ -140,7 +135,7 @@ def protocol_df_chunk_to_pandas(df: DataFrameXchg) -> pd.DataFrame: return pandas_df -def primitive_column_to_ndarray(col: Column) -> Tuple[np.ndarray, Any]: +def primitive_column_to_ndarray(col: Column) -> tuple[np.ndarray, Any]: """ Convert a column holding one of the primitive dtypes to a NumPy array. @@ -165,7 +160,7 @@ def primitive_column_to_ndarray(col: Column) -> Tuple[np.ndarray, Any]: return data, buffers -def categorical_column_to_series(col: Column) -> Tuple[pd.Series, Any]: +def categorical_column_to_series(col: Column) -> tuple[pd.Series, Any]: """ Convert a column holding categorical data to a pandas Series. @@ -205,7 +200,7 @@ def categorical_column_to_series(col: Column) -> Tuple[pd.Series, Any]: return data, buffers -def string_column_to_ndarray(col: Column) -> Tuple[np.ndarray, Any]: +def string_column_to_ndarray(col: Column) -> tuple[np.ndarray, Any]: """ Convert a column holding string data to a NumPy array. @@ -268,7 +263,7 @@ def string_column_to_ndarray(col: Column) -> Tuple[np.ndarray, Any]: null_pos = ~null_pos # Assemble the strings from the code units - str_list: List[Union[None, float, str]] = [None] * col.size + str_list: list[None | float | str] = [None] * col.size for i in range(col.size): # Check for missing values if null_pos is not None and null_pos[i]: @@ -324,7 +319,7 @@ def parse_datetime_format_str(format_str, data): raise NotImplementedError(f"DateTime kind is not supported: {format_str}") -def datetime_column_to_ndarray(col: Column) -> Tuple[np.ndarray, Any]: +def datetime_column_to_ndarray(col: Column) -> tuple[np.ndarray, Any]: """ Convert a column holding DateTime data to a NumPy array. @@ -362,9 +357,9 @@ def datetime_column_to_ndarray(col: Column) -> Tuple[np.ndarray, Any]: def buffer_to_ndarray( buffer: Buffer, - dtype: Tuple[DtypeKind, int, str, str], + dtype: tuple[DtypeKind, int, str, str], offset: int = 0, - length: Optional[int] = None, + length: int | None = None, ) -> np.ndarray: """ Build a NumPy array from the passed buffer. @@ -470,9 +465,9 @@ def bitmask_to_bool_ndarray( def set_nulls( - data: Union[np.ndarray, pd.Series], + data: np.ndarray | pd.Series, col: Column, - validity: Optional[Tuple[Buffer, Tuple[DtypeKind, int, str, str]]], + validity: tuple[Buffer, tuple[DtypeKind, int, str, str]] | None, allow_modify_inplace: bool = True, ): """ diff --git a/pandas/core/exchange/utils.py b/pandas/core/exchange/utils.py index 0c746113babee..2cc5126591718 100644 --- a/pandas/core/exchange/utils.py +++ b/pandas/core/exchange/utils.py @@ -2,6 +2,8 @@ Utility functions and objects for implementing the exchange API. """ +from __future__ import annotations + import re import typing diff --git a/pandas/core/flags.py b/pandas/core/flags.py index b4e1039e216c0..f07c6917d91e5 100644 --- a/pandas/core/flags.py +++ b/pandas/core/flags.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import weakref diff --git a/pandas/core/ops/array_ops.py b/pandas/core/ops/array_ops.py index 2caaadbc05cff..6a1c586d90b6e 100644 --- a/pandas/core/ops/array_ops.py +++ b/pandas/core/ops/array_ops.py @@ -2,6 +2,8 @@ Functions for arithmetic and comparison operations on NumPy arrays and ExtensionArrays. """ +from __future__ import annotations + import datetime from functools import partial import operator diff --git a/pandas/core/ops/common.py b/pandas/core/ops/common.py index b883fe7751daa..f0e6aa3750cee 100644 --- a/pandas/core/ops/common.py +++ b/pandas/core/ops/common.py @@ -1,6 +1,8 @@ """ Boilerplate functions used in defining binary operations. """ +from __future__ import annotations + from functools import wraps from typing import Callable diff --git a/pandas/core/ops/dispatch.py b/pandas/core/ops/dispatch.py index bfd4afe0de86f..2f500703ccfb3 100644 --- a/pandas/core/ops/dispatch.py +++ b/pandas/core/ops/dispatch.py @@ -1,6 +1,8 @@ """ Functions for defining unary operations. """ +from __future__ import annotations + from typing import Any from pandas._typing import ArrayLike diff --git a/pandas/core/ops/invalid.py b/pandas/core/ops/invalid.py index e069c765d5299..eb27cf7450119 100644 --- a/pandas/core/ops/invalid.py +++ b/pandas/core/ops/invalid.py @@ -1,6 +1,8 @@ """ Templates for invalid operations. """ +from __future__ import annotations + import operator import numpy as np diff --git a/pandas/core/ops/methods.py b/pandas/core/ops/methods.py index d1f704635ba64..e8a930083a778 100644 --- a/pandas/core/ops/methods.py +++ b/pandas/core/ops/methods.py @@ -1,6 +1,8 @@ """ Functions to generate methods and pin them to the appropriate classes. """ +from __future__ import annotations + import operator from pandas.core.dtypes.generic import ( diff --git a/pandas/core/ops/missing.py b/pandas/core/ops/missing.py index 8d5f7fb8de758..850ca44e996c4 100644 --- a/pandas/core/ops/missing.py +++ b/pandas/core/ops/missing.py @@ -21,6 +21,8 @@ 3) divmod behavior consistent with 1) and 2). """ +from __future__ import annotations + import operator import numpy as np diff --git a/pandas/core/roperator.py b/pandas/core/roperator.py index 15b16b6fa976a..2f320f4e9c6b9 100644 --- a/pandas/core/roperator.py +++ b/pandas/core/roperator.py @@ -2,6 +2,8 @@ Reversed Operations not available in the stdlib operator module. Defining these instead of using lambdas allows us to reference them by name. """ +from __future__ import annotations + import operator diff --git a/pandas/core/window/common.py b/pandas/core/window/common.py index 15144116fa924..ed2a4002f5ce7 100644 --- a/pandas/core/window/common.py +++ b/pandas/core/window/common.py @@ -1,4 +1,6 @@ """Common utility functions for rolling operations""" +from __future__ import annotations + from collections import defaultdict from typing import cast diff --git a/pandas/core/window/doc.py b/pandas/core/window/doc.py index 61cfa29ffc481..4fe08e2fa20b3 100644 --- a/pandas/core/window/doc.py +++ b/pandas/core/window/doc.py @@ -1,4 +1,6 @@ """Any shareable docstring components for rolling/expanding/ewm""" +from __future__ import annotations + from textwrap import dedent from pandas.core.shared_docs import _shared_docs diff --git a/pandas/core/window/online.py b/pandas/core/window/online.py index bb973f05687e2..2e25bdd12d3e0 100644 --- a/pandas/core/window/online.py +++ b/pandas/core/window/online.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from typing import TYPE_CHECKING import numpy as np diff --git a/pandas/io/formats/_color_data.py b/pandas/io/formats/_color_data.py index e5b72b2befa4f..2e7cb7f29646e 100644 --- a/pandas/io/formats/_color_data.py +++ b/pandas/io/formats/_color_data.py @@ -3,6 +3,8 @@ # This data has been copied here, instead of being imported from matplotlib, # not to have ``to_excel`` methods require matplotlib. # source: matplotlib._color_data (3.3.3) +from __future__ import annotations + CSS4_COLORS = { "aliceblue": "F0F8FF", "antiquewhite": "FAEBD7", diff --git a/pandas/io/sas/sas_constants.py b/pandas/io/sas/sas_constants.py index 979b2cacbf706..366e6924a1e16 100644 --- a/pandas/io/sas/sas_constants.py +++ b/pandas/io/sas/sas_constants.py @@ -1,3 +1,5 @@ +from __future__ import annotations + magic = ( b"\x00\x00\x00\x00\x00\x00\x00\x00" + b"\x00\x00\x00\x00\xc2\xea\x81\x60" diff --git a/pandas/plotting/_matplotlib/compat.py b/pandas/plotting/_matplotlib/compat.py index c731c40f10a05..6015662999a7d 100644 --- a/pandas/plotting/_matplotlib/compat.py +++ b/pandas/plotting/_matplotlib/compat.py @@ -1,4 +1,6 @@ # being a bit too dynamic +from __future__ import annotations + import operator from pandas.util.version import Version diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index b995c6ac78b80..169c9cc18a7fd 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from pandas._libs.tslibs.offsets import ( FY5253, BaseOffset,