From 88f93f92b8fba2e428233e8997eb153248c4f2ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Sat, 2 Sep 2023 20:50:50 -0400 Subject: [PATCH 1/6] TYP: fix a few types --- pandas/__init__.py | 2 ++ pandas/core/frame.py | 24 ++++++++++++++++-------- pandas/core/generic.py | 7 +++++-- pandas/io/excel/_base.py | 31 ++++++++++++------------------- pandas/io/formats/excel.py | 4 +--- 5 files changed, 36 insertions(+), 32 deletions(-) diff --git a/pandas/__init__.py b/pandas/__init__.py index d11a429987ac4..6e4fb8b5363f0 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -110,6 +110,7 @@ from pandas.core.dtypes.dtypes import SparseDtype +from pandas import util from pandas.tseries.api import infer_freq from pandas.tseries import offsets @@ -348,6 +349,7 @@ "to_timedelta", "tseries", "unique", + "util", "value_counts", "wide_to_long", ] diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 4bfa8a4415785..a731cdbf99b0e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1926,11 +1926,17 @@ def to_dict( self, orient: Literal["dict", "list", "series", "split", "tight", "index"] = ..., into: type[dict] = ..., + index: bool = ..., ) -> dict: ... @overload - def to_dict(self, orient: Literal["records"], into: type[dict] = ...) -> list[dict]: + def to_dict( + self, + orient: Literal["records"], + into: type[dict] = ..., + index: bool = ..., + ) -> list[dict]: ... @deprecate_nonkeyword_arguments( @@ -11297,7 +11303,7 @@ def _reduce_axis1(self, name: str, func, skipna: bool) -> Series: def any( # type: ignore[override] self, *, - axis: Axis = 0, + axis: Axis | None = 0, bool_only: bool = False, skipna: bool = True, **kwargs, @@ -11312,7 +11318,7 @@ def any( # type: ignore[override] @doc(make_doc("all", ndim=2)) def all( self, - axis: Axis = 0, + axis: Axis | None = 0, bool_only: bool = False, skipna: bool = True, **kwargs, @@ -11711,6 +11717,7 @@ def quantile( axis: Axis = ..., numeric_only: bool = ..., interpolation: QuantileInterpolation = ..., + method: Literal["single", "table"] = ..., ) -> Series: ... @@ -11721,6 +11728,7 @@ def quantile( axis: Axis = ..., numeric_only: bool = ..., interpolation: QuantileInterpolation = ..., + method: Literal["single", "table"] = ..., ) -> Series | DataFrame: ... @@ -11731,6 +11739,7 @@ def quantile( axis: Axis = ..., numeric_only: bool = ..., interpolation: QuantileInterpolation = ..., + method: Literal["single", "table"] = ..., ) -> Series | DataFrame: ... @@ -11830,11 +11839,10 @@ def quantile( if not is_list_like(q): # BlockManager.quantile expects listlike, so we wrap and unwrap here - # error: List item 0 has incompatible type "Union[float, Union[Union[ - # ExtensionArray, ndarray[Any, Any]], Index, Series], Sequence[float]]"; - # expected "float" - res_df = self.quantile( # type: ignore[call-overload] - [q], + # error: List item 0 has incompatible type "float | ExtensionArray | + # ndarray[Any, Any] | Index | Series | Sequence[float]"; expected "float" + res_df = self.quantile( + [q], # type: ignore[list-item] axis=axis, numeric_only=numeric_only, interpolation=interpolation, diff --git a/pandas/core/generic.py b/pandas/core/generic.py index b9407ebe6624a..fdbf76ce68377 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -11743,7 +11743,7 @@ def _logical_func( self, name: str, func, - axis: Axis = 0, + axis: Axis | None = 0, bool_only: bool_t = False, skipna: bool_t = True, **kwargs, @@ -11756,7 +11756,10 @@ def _logical_func( res = self._logical_func( name, func, axis=0, bool_only=bool_only, skipna=skipna, **kwargs ) - return res._logical_func(name, func, skipna=skipna, **kwargs) + # error: Item "bool" of "Series | bool" has no attribute "_logical_func" + return res._logical_func( # type: ignore[union-attr] + name, func, skipna=skipna, **kwargs + ) elif axis is None: axis = 0 diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 9ffbfb9f1149f..b4b0f29019c31 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -1,6 +1,5 @@ from __future__ import annotations -import abc from collections.abc import ( Hashable, Iterable, @@ -549,7 +548,7 @@ def read_excel( _WorkbookT = TypeVar("_WorkbookT") -class BaseExcelReader(Generic[_WorkbookT], metaclass=abc.ABCMeta): +class BaseExcelReader(Generic[_WorkbookT]): book: _WorkbookT def __init__( @@ -589,13 +588,11 @@ def __init__( ) @property - @abc.abstractmethod def _workbook_class(self) -> type[_WorkbookT]: - pass + raise NotImplementedError - @abc.abstractmethod def load_workbook(self, filepath_or_buffer, engine_kwargs) -> _WorkbookT: - pass + raise NotImplementedError def close(self) -> None: if hasattr(self, "book"): @@ -611,21 +608,17 @@ def close(self) -> None: self.handles.close() @property - @abc.abstractmethod def sheet_names(self) -> list[str]: - pass + raise NotImplementedError - @abc.abstractmethod def get_sheet_by_name(self, name: str): - pass + raise NotImplementedError - @abc.abstractmethod def get_sheet_by_index(self, index: int): - pass + raise NotImplementedError - @abc.abstractmethod def get_sheet_data(self, sheet, rows: int | None = None): - pass + raise NotImplementedError def raise_if_bad_sheet_by_index(self, index: int) -> None: n_sheets = len(self.sheet_names) @@ -940,7 +933,7 @@ def parse( @doc(storage_options=_shared_docs["storage_options"]) -class ExcelWriter(Generic[_WorkbookT], metaclass=abc.ABCMeta): +class ExcelWriter(Generic[_WorkbookT]): """ Class for writing DataFrame objects into excel sheets. @@ -1178,20 +1171,19 @@ def engine(self) -> str: return self._engine @property - @abc.abstractmethod def sheets(self) -> dict[str, Any]: """Mapping of sheet names to sheet objects.""" + raise NotImplementedError @property - @abc.abstractmethod def book(self) -> _WorkbookT: """ Book instance. Class type will depend on the engine used. This attribute can be used to access engine-specific features. """ + raise NotImplementedError - @abc.abstractmethod def _write_cells( self, cells, @@ -1214,12 +1206,13 @@ def _write_cells( freeze_panes: int tuple of length 2 contains the bottom-most row and right-most column to freeze """ + raise NotImplementedError - @abc.abstractmethod def _save(self) -> None: """ Save workbook to disk. """ + raise NotImplementedError def __init__( self, diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py index 9970d465ced9d..b344d9849f16c 100644 --- a/pandas/io/formats/excel.py +++ b/pandas/io/formats/excel.py @@ -941,9 +941,7 @@ def write( if isinstance(writer, ExcelWriter): need_save = False else: - # error: Cannot instantiate abstract class 'ExcelWriter' with abstract - # attributes 'engine', 'save', 'supported_extensions' and 'write_cells' - writer = ExcelWriter( # type: ignore[abstract] + writer = ExcelWriter( writer, engine=engine, storage_options=storage_options, From 0f72079f229db7e243784ee65c2e968db5f7e2ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Sat, 2 Sep 2023 23:14:34 -0400 Subject: [PATCH 2/6] namespace test --- pandas/tests/api/test_api.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 60bcb97aaa364..67af3f92305f4 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -46,8 +46,9 @@ class TestPDApi(Base): "plotting", "io", "tseries", + "util", ] - private_lib = ["compat", "core", "pandas", "util", "_built_with_meson"] + private_lib = ["compat", "core", "pandas", "_built_with_meson"] # misc misc = ["IndexSlice", "NaT", "NA"] From 4f03cc2eeae6cce5d39f88b58a67e29a1a00a2f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Sun, 3 Sep 2023 19:45:03 -0400 Subject: [PATCH 3/6] read_fwf overloads --- pandas/io/json/_json.py | 5 ++-- pandas/io/parsers/readers.py | 50 ++++++++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+), 2 deletions(-) diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 833f4986b6da6..6ffb291a54a05 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -84,6 +84,7 @@ ReadBuffer, StorageOptions, WriteBuffer, + Self, ) from pandas.core.generic import NDFrame @@ -1056,7 +1057,7 @@ def close(self) -> None: if self.handles is not None: self.handles.close() - def __iter__(self: JsonReader[FrameSeriesStrT]) -> JsonReader[FrameSeriesStrT]: + def __iter__(self) -> Self: return self @overload @@ -1099,7 +1100,7 @@ def __next__(self) -> DataFrame | Series: else: return obj - def __enter__(self) -> JsonReader[FrameSeriesStrT]: + def __enter__(self) -> Self: return self def __exit__( diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py index 10d3ab230cb9d..ff6ef10762246 100644 --- a/pandas/io/parsers/readers.py +++ b/pandas/io/parsers/readers.py @@ -1307,6 +1307,51 @@ def read_table( return _read(filepath_or_buffer, kwds) +@overload +def read_fwf( + filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str], + *, + colspecs: Sequence[tuple[int, int]] | str | None = ..., + widths: Sequence[int] | None = ..., + infer_nrows: int = ..., + dtype_backend: DtypeBackend | lib.NoDefault = ..., + iterator: Literal[True], + chunksize: int | None = ..., + **kwds, +) -> TextFileReader: + ... + + +@overload +def read_fwf( + filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str], + *, + colspecs: Sequence[tuple[int, int]] | str | None = ..., + widths: Sequence[int] | None = ..., + infer_nrows: int = ..., + dtype_backend: DtypeBackend | lib.NoDefault = ..., + iterator: bool = ..., + chunksize: int, + **kwds, +) -> TextFileReader: + ... + + +@overload +def read_fwf( + filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str], + *, + colspecs: Sequence[tuple[int, int]] | str | None = ..., + widths: Sequence[int] | None = ..., + infer_nrows: int = ..., + dtype_backend: DtypeBackend | lib.NoDefault = ..., + iterator: Literal[False] = ..., + chunksize: None = ..., + **kwds, +) -> DataFrame: + ... + + def read_fwf( filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str], *, @@ -1314,6 +1359,8 @@ def read_fwf( widths: Sequence[int] | None = None, infer_nrows: int = 100, dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default, + iterator: bool = False, + chunksize: int | None = None, **kwds, ) -> DataFrame | TextFileReader: r""" @@ -1374,6 +1421,9 @@ def read_fwf( -------- >>> pd.read_fwf('data.csv') # doctest: +SKIP """ + kwds["iterator"] = iterator + kwds["chunksize"] = chunksize + # Check input arguments. if colspecs is None and widths is None: raise ValueError("Must specify either colspecs or widths") From 9d9aefc0c16b722d551e633ccb049b84ba6de2d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Tue, 5 Sep 2023 19:38:26 -0400 Subject: [PATCH 4/6] Revert "namespace test" This reverts commit 0f72079f229db7e243784ee65c2e968db5f7e2ff. --- pandas/tests/api/test_api.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 67af3f92305f4..60bcb97aaa364 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -46,9 +46,8 @@ class TestPDApi(Base): "plotting", "io", "tseries", - "util", ] - private_lib = ["compat", "core", "pandas", "_built_with_meson"] + private_lib = ["compat", "core", "pandas", "util", "_built_with_meson"] # misc misc = ["IndexSlice", "NaT", "NA"] From bb379a5bca6cc8f348fe28aded2c9ba2a8ce2208 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Tue, 5 Sep 2023 19:42:33 -0400 Subject: [PATCH 5/6] revert util and move kwds --- pandas/__init__.py | 2 -- pandas/io/parsers/readers.py | 5 ++--- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/pandas/__init__.py b/pandas/__init__.py index 6e4fb8b5363f0..d11a429987ac4 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -110,7 +110,6 @@ from pandas.core.dtypes.dtypes import SparseDtype -from pandas import util from pandas.tseries.api import infer_freq from pandas.tseries import offsets @@ -349,7 +348,6 @@ "to_timedelta", "tseries", "unique", - "util", "value_counts", "wide_to_long", ] diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py index ff6ef10762246..e0f171035e89e 100644 --- a/pandas/io/parsers/readers.py +++ b/pandas/io/parsers/readers.py @@ -1421,9 +1421,6 @@ def read_fwf( -------- >>> pd.read_fwf('data.csv') # doctest: +SKIP """ - kwds["iterator"] = iterator - kwds["chunksize"] = chunksize - # Check input arguments. if colspecs is None and widths is None: raise ValueError("Must specify either colspecs or widths") @@ -1462,6 +1459,8 @@ def read_fwf( kwds["colspecs"] = colspecs kwds["infer_nrows"] = infer_nrows kwds["engine"] = "python-fwf" + kwds["iterator"] = iterator + kwds["chunksize"] = chunksize check_dtype_backend(dtype_backend) kwds["dtype_backend"] = dtype_backend From b3a2adae6463cd15033416222b2cae544e94376d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Tue, 5 Sep 2023 23:04:28 -0400 Subject: [PATCH 6/6] isort --- pandas/io/json/_json.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 6ffb291a54a05..52ea072d1483f 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -82,9 +82,9 @@ JSONEngine, JSONSerializable, ReadBuffer, + Self, StorageOptions, WriteBuffer, - Self, ) from pandas.core.generic import NDFrame