From 4b8a90a345386c9867c6f8ffe48b12c13bd97ad1 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 26 Jul 2023 08:38:29 -0700 Subject: [PATCH 1/5] TYP: Enable disallowing untyped defs --- pandas/compat/__init__.py | 7 ++- pandas/core/interchange/dataframe.py | 11 ++-- pandas/io/feather_format.py | 9 ++-- pandas/io/gbq.py | 2 +- pandas/io/orc.py | 4 +- pandas/io/pickle.py | 7 ++- pyproject.toml | 76 ++++++++++++++++++++++++++-- 7 files changed, 101 insertions(+), 15 deletions(-) diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py index bfcfd5c74351a..ee20a97d69f98 100644 --- a/pandas/compat/__init__.py +++ b/pandas/compat/__init__.py @@ -12,7 +12,10 @@ import os import platform import sys -from typing import TYPE_CHECKING +from typing import ( + TYPE_CHECKING, + Any, +) from pandas.compat._constants import ( IS64, @@ -35,7 +38,7 @@ from pandas._typing import F -def set_function_name(f: F, name: str, cls) -> F: +def set_function_name(f: F, name: str, cls: Any) -> F: """ Bind the name/qualname attributes of the function. """ diff --git a/pandas/core/interchange/dataframe.py b/pandas/core/interchange/dataframe.py index 51b6cebabc2d5..5372fd7ef3c8d 100644 --- a/pandas/core/interchange/dataframe.py +++ b/pandas/core/interchange/dataframe.py @@ -7,6 +7,11 @@ from pandas.core.interchange.dataframe_protocol import DataFrame as DataFrameXchg if TYPE_CHECKING: + from collections.abc import ( + Iterable, + Sequence, + ) + from pandas import ( DataFrame, Index, @@ -72,7 +77,7 @@ def get_columns(self) -> list[PandasColumn]: for name in self._df.columns ] - def select_columns(self, indices) -> PandasDataFrameXchg: + def select_columns(self, indices: Sequence[int]) -> PandasDataFrameXchg: if not isinstance(indices, abc.Sequence): raise ValueError("`indices` is not a sequence") if not isinstance(indices, list): @@ -82,7 +87,7 @@ def select_columns(self, indices) -> PandasDataFrameXchg: self._df.iloc[:, indices], self._nan_as_null, self._allow_copy ) - def select_columns_by_name(self, names) -> PandasDataFrameXchg: + def select_columns_by_name(self, names: Sequence[str]) -> PandasDataFrameXchg: if not isinstance(names, abc.Sequence): raise ValueError("`names` is not a sequence") if not isinstance(names, list): @@ -92,7 +97,7 @@ def select_columns_by_name(self, names) -> PandasDataFrameXchg: self._df.loc[:, names], self._nan_as_null, self._allow_copy ) - def get_chunks(self, n_chunks: int | None = None): + def get_chunks(self, n_chunks: int | None = None) -> Iterable[PandasDataFrameXchg]: """ Return an iterator yielding the chunks. """ diff --git a/pandas/io/feather_format.py b/pandas/io/feather_format.py index ee479ec8b8ba7..77b2b12fda77f 100644 --- a/pandas/io/feather_format.py +++ b/pandas/io/feather_format.py @@ -1,7 +1,10 @@ """ feather-format compat """ from __future__ import annotations -from typing import TYPE_CHECKING +from typing import ( + TYPE_CHECKING, + Any, +) from pandas._libs import lib from pandas.compat._optional import import_optional_dependency @@ -34,7 +37,7 @@ def to_feather( df: DataFrame, path: FilePath | WriteBuffer[bytes], storage_options: StorageOptions | None = None, - **kwargs, + **kwargs: Any, ) -> None: """ Write a DataFrame to the binary Feather format. @@ -70,7 +73,7 @@ def read_feather( use_threads: bool = True, storage_options: StorageOptions | None = None, dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default, -): +) -> DataFrame: """ Load a feather-format object from the file path. diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py index 6a0105abda71d..f8825f2b3145c 100644 --- a/pandas/io/gbq.py +++ b/pandas/io/gbq.py @@ -215,7 +215,7 @@ def to_gbq( table_schema: list[dict[str, str]] | None = None, location: str | None = None, progress_bar: bool = True, - credentials=None, + credentials: Any = None, ) -> None: pandas_gbq = _try_import() pandas_gbq.to_gbq( diff --git a/pandas/io/orc.py b/pandas/io/orc.py index 3d3d3eafcca29..5d9d439214555 100644 --- a/pandas/io/orc.py +++ b/pandas/io/orc.py @@ -44,8 +44,8 @@ def read_orc( path: FilePath | ReadBuffer[bytes], columns: list[str] | None = None, dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default, - filesystem=None, - **kwargs, + filesystem: Any = None, + **kwargs: Any, ) -> DataFrame: """ Load an ORC object from the file path, returning a DataFrame. diff --git a/pandas/io/pickle.py b/pandas/io/pickle.py index 04e0cbc3d289d..de9f1168e40dd 100644 --- a/pandas/io/pickle.py +++ b/pandas/io/pickle.py @@ -24,6 +24,11 @@ WriteBuffer, ) + from pandas import ( + DataFrame, + Series, + ) + @doc( storage_options=_shared_docs["storage_options"], @@ -116,7 +121,7 @@ def read_pickle( filepath_or_buffer: FilePath | ReadPickleBuffer, compression: CompressionOptions = "infer", storage_options: StorageOptions | None = None, -): +) -> DataFrame | Series: """ Load pickled pandas object (or any object) from file. diff --git a/pyproject.toml b/pyproject.toml index 76cd3d21c1fcd..1945f08e369a1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -534,9 +534,9 @@ disallow_any_explicit = false # TODO disallow_any_generics = false # TODO disallow_subclassing_any = false # TODO # Untyped definitions and calls -disallow_untyped_calls = false # TODO -disallow_untyped_defs = false # TODO -disallow_incomplete_defs = false # TODO +disallow_untyped_calls = true +disallow_untyped_defs = true +disallow_incomplete_defs = true check_untyped_defs = true disallow_untyped_decorators = true # None and Optional handling @@ -562,6 +562,76 @@ show_error_context = false show_column_numbers = false show_error_codes = true +[[tool.mypy.overrides]] +module = [ + "pandas._config.config", # TODO + "pandas._libs.*", + "pandas._testing.*", # TODO + "pandas.arrays", # TODO + "pandas.compat.numpy.function", # TODO + "pandas.compat._optional", # TODO + "pandas.compat.compressors", # TODO + "pandas.compat.pickle_compat", # TODO + #"pandas.core.*", + # "pandas.core.indexes.*", # TODO + "pandas.core.strings.*", # TODO + "pandas.core.tools.*", # TODO + "pandas.core.window.common", # TODO + "pandas.core.window.ewm", # TODO + "pandas.core.window.expanding", # TODO + "pandas.core.window.numba_", # TODO + "pandas.core.window.online", # TODO + "pandas.core.window.rolling", # TODO + "pandas.errors", # TODO + "pandas.io.clipboard", # TODO + "pandas.io.excel._base", # TODO + "pandas.io.excel._odfreader", # TODO + "pandas.io.excel._odswriter", # TODO + "pandas.io.excel._openpyxl", # TODO + "pandas.io.excel._pyxlsb", # TODO + "pandas.io.excel._xlrd", # TODO + "pandas.io.excel._xlsxwriter", # TODO + "pandas.io.formats.console", # TODO + "pandas.io.formats.css", # TODO + "pandas.io.formats.excel", # TODO + "pandas.io.formats.format", # TODO + "pandas.io.formats.info", # TODO + "pandas.io.formats.printing", # TODO + "pandas.io.formats.style", # TODO + "pandas.io.formats.style_render", # TODO + "pandas.io.formats.xml", # TODO + "pandas.io.json.*", # TODO + "pandas.io.parsers.*", # TODO + "pandas.io.sas.sas_xport", # TODO + "pandas.io.sas.sas7bdat", # TODO + "pandas.io.clipboards", # TODO + "pandas.io.common", # TODO + "pandas.io.gbq", # TODO + "pandas.io.html", # TODO + "pandas.io.gbq", # TODO + "pandas.io.parquet", # TODO + "pandas.io.pytables", # TODO + "pandas.io.sql", # TODO + "pandas.io.stata", # TODO + "pandas.io.xml", # TODO + "pandas.plotting.*", # TODO + "pandas.tests.*", + "pandas.tseries.frequencies", # TODO + "pandas.tseries.holiday", # TODO + "pandas.util._decorators", # TODO + "pandas.util._doctools", # TODO + "pandas.util._print_versions", # TODO + "pandas.util._test_decorators", # TODO + "pandas.util._validators", # TODO + "pandas.util", # TODO + "pandas._version", + "pandas.conftest", + "pandas" +] +disallow_untyped_calls = false +disallow_untyped_defs = false +disallow_incomplete_defs = false + [[tool.mypy.overrides]] module = [ "pandas.tests.*", From b239018401dc1d4064f6f9581b46c552244a6356 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 26 Jul 2023 14:18:42 -0700 Subject: [PATCH 2/5] finish rest of core --- pandas/core/array_algos/transforms.py | 9 +- pandas/core/dtypes/base.py | 2 +- pandas/core/interchange/buffer.py | 4 +- pyproject.toml | 153 +++++++++++++++++--------- 4 files changed, 114 insertions(+), 54 deletions(-) diff --git a/pandas/core/array_algos/transforms.py b/pandas/core/array_algos/transforms.py index 090bbc4c7be24..ec67244949e3d 100644 --- a/pandas/core/array_algos/transforms.py +++ b/pandas/core/array_algos/transforms.py @@ -9,10 +9,15 @@ import numpy as np if TYPE_CHECKING: - from pandas._typing import AxisInt + from pandas._typing import ( + AxisInt, + Scalar, + ) -def shift(values: np.ndarray, periods: int, axis: AxisInt, fill_value) -> np.ndarray: +def shift( + values: np.ndarray, periods: int, axis: AxisInt, fill_value: Scalar +) -> np.ndarray: new_values = values if periods == 0 or values.size == 0: diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py index f0e55aa178ec0..83656ba1ca14a 100644 --- a/pandas/core/dtypes/base.py +++ b/pandas/core/dtypes/base.py @@ -398,7 +398,7 @@ class StorageExtensionDtype(ExtensionDtype): name: str _metadata = ("storage",) - def __init__(self, storage=None) -> None: + def __init__(self, storage: str | None = None) -> None: self.storage = storage def __repr__(self) -> str: diff --git a/pandas/core/interchange/buffer.py b/pandas/core/interchange/buffer.py index 0f62dd00a0f41..b31a2526a063d 100644 --- a/pandas/core/interchange/buffer.py +++ b/pandas/core/interchange/buffer.py @@ -1,5 +1,7 @@ from __future__ import annotations +from typing import Any + import numpy as np from pandas.core.interchange.dataframe_protocol import ( @@ -49,7 +51,7 @@ def ptr(self) -> int: """ return self._x.__array_interface__["data"][0] - def __dlpack__(self): + def __dlpack__(self) -> Any: """ Represent this structure as DLPack interface. """ diff --git a/pyproject.toml b/pyproject.toml index e21d791986756..36fb58ee396a2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -568,16 +568,50 @@ show_error_codes = true [[tool.mypy.overrides]] module = [ - "pandas._config.config", # TODO + "pandas._config.config", # TODO "pandas._libs.*", - "pandas._testing.*", # TODO - "pandas.arrays", # TODO - "pandas.compat.numpy.function", # TODO - "pandas.compat._optional", # TODO - "pandas.compat.compressors", # TODO - "pandas.compat.pickle_compat", # TODO - #"pandas.core.*", - # "pandas.core.indexes.*", # TODO + "pandas._testing.*", # TODO + "pandas.arrays", # TODO + "pandas.compat.numpy.function", # TODO + "pandas.compat._optional", # TODO + "pandas.compat.compressors", # TODO + "pandas.compat.pickle_compat", # TODO + "pandas.core._numba.executor", # TODO + "pandas.core.array_algos.datetimelike_accumulations", # TODO + "pandas.core.array_algos.masked_accumulations", # TODO + "pandas.core.array_algos.masked_reductions", # TODO + "pandas.core.array_algos.putmask", # TODO + "pandas.core.array_algos.quantile", # TODO + "pandas.core.array_algos.replace", # TODO + "pandas.core.array_algos.take", # TODO + "pandas.core.arrays.*", # TODO + "pandas.core.computation.*", # TODO + "pandas.core.dtypes.astype", # TODO + "pandas.core.dtypes.cast", # TODO + "pandas.core.dtypes.common", # TODO + "pandas.core.dtypes.concat", # TODO + "pandas.core.dtypes.dtypes", # TODO + "pandas.core.dtypes.generic", # TODO + "pandas.core.dtypes.inference", # TODO + "pandas.core.dtypes.missing", # TODO + "pandas.core.groupby.categorical", # TODO + "pandas.core.groupby.generic", # TODO + "pandas.core.groupby.grouper", # TODO + "pandas.core.groupby.groupby", # TODO + "pandas.core.groupby.ops", # TODO + "pandas.core.indexers.*", # TODO + "pandas.core.indexes.*", # TODO + "pandas.core.interchange.column", # TODO + "pandas.core.interchange.dataframe_protocol", # TODO + "pandas.core.interchange.from_dataframe", # TODO + "pandas.core.internals.*", # TODO + "pandas.core.methods.*", # TODO + "pandas.core.ops.array_ops", # TODO + "pandas.core.ops.common", # TODO + "pandas.core.ops.invalid", # TODO + "pandas.core.ops.mask_ops", # TODO + "pandas.core.ops.missing", # TODO + "pandas.core.reshape.*", # TODO "pandas.core.strings.*", # TODO "pandas.core.tools.*", # TODO "pandas.core.window.common", # TODO @@ -586,48 +620,67 @@ module = [ "pandas.core.window.numba_", # TODO "pandas.core.window.online", # TODO "pandas.core.window.rolling", # TODO - "pandas.errors", # TODO - "pandas.io.clipboard", # TODO - "pandas.io.excel._base", # TODO - "pandas.io.excel._odfreader", # TODO - "pandas.io.excel._odswriter", # TODO - "pandas.io.excel._openpyxl", # TODO - "pandas.io.excel._pyxlsb", # TODO - "pandas.io.excel._xlrd", # TODO - "pandas.io.excel._xlsxwriter", # TODO - "pandas.io.formats.console", # TODO - "pandas.io.formats.css", # TODO - "pandas.io.formats.excel", # TODO - "pandas.io.formats.format", # TODO - "pandas.io.formats.info", # TODO - "pandas.io.formats.printing", # TODO - "pandas.io.formats.style", # TODO - "pandas.io.formats.style_render", # TODO - "pandas.io.formats.xml", # TODO - "pandas.io.json.*", # TODO - "pandas.io.parsers.*", # TODO - "pandas.io.sas.sas_xport", # TODO - "pandas.io.sas.sas7bdat", # TODO - "pandas.io.clipboards", # TODO - "pandas.io.common", # TODO - "pandas.io.gbq", # TODO - "pandas.io.html", # TODO - "pandas.io.gbq", # TODO - "pandas.io.parquet", # TODO - "pandas.io.pytables", # TODO - "pandas.io.sql", # TODO - "pandas.io.stata", # TODO - "pandas.io.xml", # TODO - "pandas.plotting.*", # TODO + "pandas.core.accessor", # TODO + "pandas.core.algorithms", # TODO + "pandas.core.apply", # TODO + "pandas.core.arraylike", # TODO + "pandas.core.base", # TODO + "pandas.core.common", # TODO + "pandas.core.config_init", # TODO + "pandas.core.construction", # TODO + "pandas.core.flags", # TODO + "pandas.core.frame", # TODO + "pandas.core.generic", # TODO + "pandas.core.indexing", # TODO + "pandas.core.missing", # TODO + "pandas.core.nanops", # TODO + "pandas.core.resample", # TODO + "pandas.core.roperator", # TODO + "pandas.core.sample", # TODO + "pandas.core.series", # TODO + "pandas.core.sorting", # TODO + "pandas.errors", # TODO + "pandas.io.clipboard", # TODO + "pandas.io.excel._base", # TODO + "pandas.io.excel._odfreader", # TODO + "pandas.io.excel._odswriter", # TODO + "pandas.io.excel._openpyxl", # TODO + "pandas.io.excel._pyxlsb", # TODO + "pandas.io.excel._xlrd", # TODO + "pandas.io.excel._xlsxwriter", # TODO + "pandas.io.formats.console", # TODO + "pandas.io.formats.css", # TODO + "pandas.io.formats.excel", # TODO + "pandas.io.formats.format", # TODO + "pandas.io.formats.info", # TODO + "pandas.io.formats.printing", # TODO + "pandas.io.formats.style", # TODO + "pandas.io.formats.style_render", # TODO + "pandas.io.formats.xml", # TODO + "pandas.io.json.*", # TODO + "pandas.io.parsers.*", # TODO + "pandas.io.sas.sas_xport", # TODO + "pandas.io.sas.sas7bdat", # TODO + "pandas.io.clipboards", # TODO + "pandas.io.common", # TODO + "pandas.io.gbq", # TODO + "pandas.io.html", # TODO + "pandas.io.gbq", # TODO + "pandas.io.parquet", # TODO + "pandas.io.pytables", # TODO + "pandas.io.sql", # TODO + "pandas.io.stata", # TODO + "pandas.io.xml", # TODO + "pandas.plotting.*", # TODO "pandas.tests.*", - "pandas.tseries.frequencies", # TODO - "pandas.tseries.holiday", # TODO - "pandas.util._decorators", # TODO - "pandas.util._doctools", # TODO - "pandas.util._print_versions", # TODO - "pandas.util._test_decorators", # TODO - "pandas.util._validators", # TODO - "pandas.util", # TODO + "pandas.tseries.frequencies", # TODO + "pandas.tseries.holiday", # TODO + "pandas.util._decorators", # TODO + "pandas.util._doctools", # TODO + "pandas.util._print_versions", # TODO + "pandas.util._test_decorators", # TODO + "pandas.util._validators", # TODO + "pandas.util", # TODO "pandas._version", "pandas.conftest", "pandas" From c615a8b0ef4dea7f394b9bd4cc06d952fdfe1771 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 26 Jul 2023 17:35:42 -0700 Subject: [PATCH 3/5] add typings --- pandas/compat/__init__.py | 7 ++----- pandas/io/orc.py | 5 ++++- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py index 62ea7ed17fb99..be0a762642e46 100644 --- a/pandas/compat/__init__.py +++ b/pandas/compat/__init__.py @@ -12,10 +12,7 @@ import os import platform import sys -from typing import ( - TYPE_CHECKING, - Any, -) +from typing import TYPE_CHECKING from pandas.compat._constants import ( IS64, @@ -39,7 +36,7 @@ from pandas._typing import F -def set_function_name(f: F, name: str, cls: Any) -> F: +def set_function_name(f: F, name: str, cls: type) -> F: """ Bind the name/qualname attributes of the function. """ diff --git a/pandas/io/orc.py b/pandas/io/orc.py index 5d9d439214555..75f7f9e56439e 100644 --- a/pandas/io/orc.py +++ b/pandas/io/orc.py @@ -30,6 +30,9 @@ ) if TYPE_CHECKING: + import fsspec + import pyarrow.fs + from pandas._typing import ( DtypeBackend, FilePath, @@ -44,7 +47,7 @@ def read_orc( path: FilePath | ReadBuffer[bytes], columns: list[str] | None = None, dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default, - filesystem: Any = None, + filesystem: pyarrow.fs.FileSystem | fsspec.spec.AbstractFileSystem | None = None, **kwargs: Any, ) -> DataFrame: """ From 05b3a3b857d56ea325617b0f1e17bec726c85839 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 26 Jul 2023 17:43:27 -0700 Subject: [PATCH 4/5] Install google auth for typing --- environment.yml | 1 + pandas/io/gbq.py | 4 +++- requirements-dev.txt | 1 + 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/environment.yml b/environment.yml index e85e55e76775b..c95f5090894fd 100644 --- a/environment.yml +++ b/environment.yml @@ -83,6 +83,7 @@ dependencies: # documentation - gitpython # obtain contributors from git for whatsnew - gitdb + - google-auth - natsort # DataFrame.sort_values doctest - numpydoc - pydata-sphinx-theme diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py index f8825f2b3145c..2d7e2f7c4b492 100644 --- a/pandas/io/gbq.py +++ b/pandas/io/gbq.py @@ -9,6 +9,8 @@ from pandas.compat._optional import import_optional_dependency if TYPE_CHECKING: + import google.auth + from pandas import DataFrame @@ -33,7 +35,7 @@ def read_gbq( dialect: str | None = None, location: str | None = None, configuration: dict[str, Any] | None = None, - credentials=None, + credentials: google.auth.credentials.Credentials | None = None, use_bqstorage_api: bool | None = None, max_results: int | None = None, progress_bar_type: str | None = None, diff --git a/requirements-dev.txt b/requirements-dev.txt index 0d00d8b2fb693..bd97716c418ee 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -58,6 +58,7 @@ tokenize-rt pre-commit>=2.15.0 gitpython gitdb +google-auth natsort numpydoc pydata-sphinx-theme From 1df3c4c371ea603e713392ee3f1d82dc2c7b9f09 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 27 Jul 2023 09:24:54 -0700 Subject: [PATCH 5/5] type another credentials, list[str] --- pandas/core/interchange/dataframe.py | 2 +- pandas/io/gbq.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/interchange/dataframe.py b/pandas/core/interchange/dataframe.py index 5372fd7ef3c8d..0ddceb6b8139b 100644 --- a/pandas/core/interchange/dataframe.py +++ b/pandas/core/interchange/dataframe.py @@ -87,7 +87,7 @@ def select_columns(self, indices: Sequence[int]) -> PandasDataFrameXchg: self._df.iloc[:, indices], self._nan_as_null, self._allow_copy ) - def select_columns_by_name(self, names: Sequence[str]) -> PandasDataFrameXchg: + def select_columns_by_name(self, names: list[str]) -> PandasDataFrameXchg: # type: ignore[override] # noqa: E501 if not isinstance(names, abc.Sequence): raise ValueError("`names` is not a sequence") if not isinstance(names, list): diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py index 2d7e2f7c4b492..ee71f5af12d09 100644 --- a/pandas/io/gbq.py +++ b/pandas/io/gbq.py @@ -217,7 +217,7 @@ def to_gbq( table_schema: list[dict[str, str]] | None = None, location: str | None = None, progress_bar: bool = True, - credentials: Any = None, + credentials: google.auth.credentials.Credentials | None = None, ) -> None: pandas_gbq = _try_import() pandas_gbq.to_gbq(