From f1c7750e8ff75fa622d41588245467a1e775ddd7 Mon Sep 17 00:00:00 2001 From: phofl Date: Wed, 22 Dec 2021 22:23:37 +0100 Subject: [PATCH 1/9] Type excel base class --- pandas/io/common.py | 3 +- pandas/io/excel/_base.py | 182 ++++++++++++++++++++-------------- pandas/io/excel/_odswriter.py | 2 +- 3 files changed, 109 insertions(+), 78 deletions(-) diff --git a/pandas/io/common.py b/pandas/io/common.py index 1adccad7899b7..720e95379a831 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -24,6 +24,7 @@ Generic, Literal, Mapping, + Sequence, TypeVar, cast, overload, @@ -168,7 +169,7 @@ def _expand_user(filepath_or_buffer: str | BaseBufferT) -> str | BaseBufferT: return filepath_or_buffer -def validate_header_arg(header) -> None: +def validate_header_arg(header: int | Sequence[int] | None) -> None: if isinstance(header, bool): raise TypeError( "Passing a bool to header is invalid. Use header=None for no header or " diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index a5db36cee4254..2907cdc853d79 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -8,7 +8,13 @@ from typing import ( IO, Any, + Callable, + Hashable, + List, + Literal, Mapping, + Sequence, + Union, cast, ) import warnings @@ -346,33 +352,33 @@ @Appender(_read_excel_doc) def read_excel( io, - sheet_name=0, - header=0, + sheet_name: str | int | list[int] | list[str] | None = 0, + header: int | Sequence[int] | None = 0, names=None, - index_col=None, + index_col: int | Sequence[int] | None = None, usecols=None, - squeeze=None, + squeeze: bool | None = None, dtype: DtypeArg | None = None, - engine=None, + engine: Literal["xlrd", "openpyxl", "odf", "pyxlsb"] | None = None, converters=None, - true_values=None, - false_values=None, - skiprows=None, - nrows=None, + true_values: list[Hashable] | None = None, + false_values: list[Hashable] | None = None, + skiprows: Sequence[int] | int | Callable[[int], object] | None = None, + nrows: int | None = None, na_values=None, - keep_default_na=True, - na_filter=True, - verbose=False, + keep_default_na: bool = True, + na_filter: bool = True, + verbose: bool = False, parse_dates=False, date_parser=None, - thousands=None, - decimal=".", - comment=None, - skipfooter=0, - convert_float=None, - mangle_dupe_cols=True, + thousands: str | None = None, + decimal: str = ".", + comment: str | None = None, + skipfooter: int = 0, + convert_float: bool | None = None, + mangle_dupe_cols: bool = True, storage_options: StorageOptions = None, -): +) -> DataFrame | dict[str, DataFrame] | dict[int, DataFrame]: should_close = False if not isinstance(io, ExcelFile): @@ -466,19 +472,19 @@ def close(self): @property @abc.abstractmethod - def sheet_names(self): + def sheet_names(self) -> list[str]: pass @abc.abstractmethod - def get_sheet_by_name(self, name): + def get_sheet_by_name(self, name: str): pass @abc.abstractmethod - def get_sheet_by_index(self, index): + def get_sheet_by_index(self, index: int): pass @abc.abstractmethod - def get_sheet_data(self, sheet, convert_float): + def get_sheet_data(self, sheet, convert_float: bool): pass def raise_if_bad_sheet_by_index(self, index: int) -> None: @@ -494,27 +500,27 @@ def raise_if_bad_sheet_by_name(self, name: str) -> None: def parse( self, - sheet_name=0, - header=0, + sheet_name: str | int | list[int] | list[str] | None = 0, + header: int | Sequence[int] | None = 0, names=None, - index_col=None, + index_col: int | Sequence[int] | None = None, usecols=None, - squeeze=None, + squeeze: bool | None = None, dtype: DtypeArg | None = None, - true_values=None, - false_values=None, - skiprows=None, - nrows=None, + true_values: list[Hashable] | None = None, + false_values: list[Hashable] | None = None, + skiprows: Sequence[int] | int | Callable[[int], object] | None = None, + nrows: int | None = None, na_values=None, - verbose=False, + verbose: bool = False, parse_dates=False, date_parser=None, - thousands=None, - decimal=".", - comment=None, - skipfooter=0, - convert_float=None, - mangle_dupe_cols=True, + thousands: str | None = None, + decimal: str = ".", + comment: str | None = None, + skipfooter: int = 0, + convert_float: bool | None = None, + mangle_dupe_cols: bool = True, **kwds, ): @@ -532,17 +538,20 @@ def parse( ret_dict = False # Keep sheetname to maintain backwards compatibility. + sheets: list[int] | list[str] if isinstance(sheet_name, list): sheets = sheet_name ret_dict = True elif sheet_name is None: sheets = self.sheet_names ret_dict = True + elif isinstance(sheet_name, str): + sheets = [sheet_name] else: sheets = [sheet_name] # handle same-type duplicates. - sheets = list(dict.fromkeys(sheets).keys()) + sheets = cast(Union[List[int], List[str]], list(dict.fromkeys(sheets).keys())) output = {} @@ -565,17 +574,28 @@ def parse( output[asheetname] = DataFrame() continue - if is_list_like(header) and len(header) == 1: - header = header[0] + is_list_header = False + is_len_one_list_header = False + if is_list_like(header): + assert isinstance(header, Sequence) + is_list_header = True + if len(header) == 1: + is_len_one_list_header = True + + if is_len_one_list_header: + header = cast(Sequence[int], header)[0] # forward fill and pull out names for MultiIndex column header_names = None if header is not None and is_list_like(header): + assert isinstance(header, Sequence) + header_names = [] control_row = [True] * len(data[0]) for row in header: if is_integer(skiprows): + assert isinstance(skiprows, int) row += skiprows data[row], control_row = fill_mi_header(data[row], control_row) @@ -587,14 +607,14 @@ def parse( # If there is a MultiIndex header and an index then there is also # a row containing just the index name(s) has_index_names = ( - is_list_like(header) and len(header) > 1 and index_col is not None + is_list_header and not is_len_one_list_header and index_col is not None ) if is_list_like(index_col): # Forward fill values for MultiIndex index. if header is None: offset = 0 - elif not is_list_like(header): + elif isinstance(header, int): offset = 1 + header else: offset = 1 + max(header) @@ -608,6 +628,8 @@ def parse( # Check if we have an empty dataset # before trying to collect data. if offset < len(data): + assert isinstance(index_col, Sequence) + for col in index_col: last = data[offset][col] @@ -875,12 +897,12 @@ class ExcelWriter(metaclass=abc.ABCMeta): def __new__( cls, path: FilePath | WriteExcelBuffer | ExcelWriter, - engine=None, - date_format=None, - datetime_format=None, + engine: str | None = None, + date_format: str | None = None, + datetime_format: str | None = None, mode: str = "w", storage_options: StorageOptions = None, - if_sheet_exists: str | None = None, + if_sheet_exists: Literal["error", "new", "replace", "overlay"] | None = None, engine_kwargs: dict | None = None, **kwargs, ): @@ -937,7 +959,7 @@ def __new__( @property @abc.abstractmethod - def supported_extensions(self): + def supported_extensions(self) -> tuple[str, ...] | list[str]: """Extensions that writer engine supports.""" pass @@ -949,8 +971,13 @@ def engine(self): @abc.abstractmethod def write_cells( - self, cells, sheet_name=None, startrow=0, startcol=0, freeze_panes=None - ): + self, + cells, + sheet_name: str | None = None, + startrow: int = 0, + startcol: int = 0, + freeze_panes: tuple[int, int] | None = None, + ) -> None: """ Write given formatted cells into Excel an excel sheet @@ -968,7 +995,7 @@ def write_cells( pass @abc.abstractmethod - def save(self): + def save(self) -> None: """ Save workbook to disk. """ @@ -977,9 +1004,9 @@ def save(self): def __init__( self, path: FilePath | WriteExcelBuffer | ExcelWriter, - engine=None, - date_format=None, - datetime_format=None, + engine: str | None = None, + date_format: str | None = None, + datetime_format: str | None = None, mode: str = "w", storage_options: StorageOptions = None, if_sheet_exists: str | None = None, @@ -1034,14 +1061,14 @@ def __init__( def __fspath__(self): return getattr(self.handles.handle, "name", "") - def _get_sheet_name(self, sheet_name): + def _get_sheet_name(self, sheet_name: str | None) -> str: if sheet_name is None: sheet_name = self.cur_sheet if sheet_name is None: # pragma: no cover raise ValueError("Must pass explicit sheet_name or set cur_sheet property") return sheet_name - def _value_with_fmt(self, val): + def _value_with_fmt(self, val) -> tuple[object, str | None]: """ Convert numpy types to Python types for the Excel writers. @@ -1076,7 +1103,7 @@ def _value_with_fmt(self, val): return val, fmt @classmethod - def check_extension(cls, ext: str): + def check_extension(cls, ext: str) -> Literal[True]: """ checks that path's extension against the Writer's supported extensions. If it isn't supported, raises UnsupportedFiletypeError. @@ -1100,11 +1127,10 @@ def __enter__(self): def __exit__(self, exc_type, exc_value, traceback): self.close() - def close(self): + def close(self) -> None: """synonym for save, to make it more file-like""" - content = self.save() + self.save() self.handles.close() - return content XLS_SIGNATURES = ( @@ -1243,7 +1269,10 @@ class ExcelFile: } def __init__( - self, path_or_buffer, engine=None, storage_options: StorageOptions = None + self, + path_or_buffer, + engine: str | None = None, + storage_options: StorageOptions = None, ): if engine is not None and engine not in self._engines: raise ValueError(f"Unknown engine: {engine}") @@ -1310,6 +1339,7 @@ def __init__( stacklevel=stacklevel, ) + assert engine is not None self.engine = engine self.storage_options = storage_options @@ -1320,27 +1350,27 @@ def __fspath__(self): def parse( self, - sheet_name=0, - header=0, + sheet_name: str | int | list[int] | list[str] | None = 0, + header: int | Sequence[int] | None = 0, names=None, - index_col=None, + index_col: int | Sequence[int] | None = None, usecols=None, - squeeze=None, + squeeze: bool | None = None, converters=None, - true_values=None, - false_values=None, - skiprows=None, - nrows=None, + true_values: list[Hashable] | None = None, + false_values: list[Hashable] | None = None, + skiprows: Sequence[int] | int | Callable[[int], object] | None = None, + nrows: int | None = None, na_values=None, parse_dates=False, date_parser=None, - thousands=None, - comment=None, - skipfooter=0, - convert_float=None, - mangle_dupe_cols=True, + thousands: str | None = None, + comment: str | None = None, + skipfooter: int = 0, + convert_float: bool | None = None, + mangle_dupe_cols: bool = True, **kwds, - ): + ) -> DataFrame | dict[str, DataFrame] | dict[int, DataFrame]: """ Parse specified sheet(s) into a DataFrame. @@ -1383,7 +1413,7 @@ def book(self): def sheet_names(self): return self._reader.sheet_names - def close(self): + def close(self) -> None: """close io if necessary""" self._reader.close() diff --git a/pandas/io/excel/_odswriter.py b/pandas/io/excel/_odswriter.py index add95c58cd809..d4fe3683c907e 100644 --- a/pandas/io/excel/_odswriter.py +++ b/pandas/io/excel/_odswriter.py @@ -140,7 +140,7 @@ def _make_table_cell_attributes(self, cell) -> dict[str, int | str]: attributes["numbercolumnsspanned"] = cell.mergeend return attributes - def _make_table_cell(self, cell) -> tuple[str, Any]: + def _make_table_cell(self, cell) -> tuple[object, Any]: """Convert cell data to an OpenDocument spreadsheet cell Parameters From 48176f0389c77cb827e5e6ed323c84e45b050b30 Mon Sep 17 00:00:00 2001 From: phofl Date: Wed, 29 Dec 2021 15:33:15 +0100 Subject: [PATCH 2/9] Fix optional issue --- pandas/io/excel/_base.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 26a5f9545e396..c3ea96693b5ef 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -950,6 +950,8 @@ def __new__( stacklevel=find_stack_level(), ) + # for mypy + assert engine is not None cls = get_writer(engine) return object.__new__(cls) From a710cd7389c6a99e3e6628b770389147fbbfdfb4 Mon Sep 17 00:00:00 2001 From: phofl Date: Wed, 29 Dec 2021 15:52:00 +0100 Subject: [PATCH 3/9] Generalize --- pandas/io/excel/_base.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index c3ea96693b5ef..38a62d558c51a 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -10,6 +10,7 @@ Any, Callable, Hashable, + Iterable, List, Literal, Mapping, @@ -361,8 +362,8 @@ def read_excel( dtype: DtypeArg | None = None, engine: Literal["xlrd", "openpyxl", "odf", "pyxlsb"] | None = None, converters=None, - true_values: list[Hashable] | None = None, - false_values: list[Hashable] | None = None, + true_values: Iterable[Hashable] | None = None, + false_values: Iterable[Hashable] | None = None, skiprows: Sequence[int] | int | Callable[[int], object] | None = None, nrows: int | None = None, na_values=None, @@ -507,8 +508,8 @@ def parse( usecols=None, squeeze: bool | None = None, dtype: DtypeArg | None = None, - true_values: list[Hashable] | None = None, - false_values: list[Hashable] | None = None, + true_values: Iterable[Hashable] | None = None, + false_values: Iterable[Hashable] | None = None, skiprows: Sequence[int] | int | Callable[[int], object] | None = None, nrows: int | None = None, na_values=None, @@ -1359,8 +1360,8 @@ def parse( usecols=None, squeeze: bool | None = None, converters=None, - true_values: list[Hashable] | None = None, - false_values: list[Hashable] | None = None, + true_values: Iterable[Hashable] | None = None, + false_values: Iterable[Hashable] | None = None, skiprows: Sequence[int] | int | Callable[[int], object] | None = None, nrows: int | None = None, na_values=None, From 3e5407f520ac11785d7b670a648ca4124aacd16f Mon Sep 17 00:00:00 2001 From: phofl Date: Wed, 29 Dec 2021 16:52:35 +0100 Subject: [PATCH 4/9] Revert type --- pandas/io/common.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/io/common.py b/pandas/io/common.py index d3c7a037b748b..eaf6f6475ec84 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -26,7 +26,6 @@ Generic, Literal, Mapping, - Sequence, TypeVar, cast, overload, @@ -175,7 +174,7 @@ def _expand_user(filepath_or_buffer: str | BaseBufferT) -> str | BaseBufferT: return filepath_or_buffer -def validate_header_arg(header: int | Sequence[int] | None) -> None: +def validate_header_arg(header: object) -> None: if isinstance(header, bool): raise TypeError( "Passing a bool to header is invalid. Use header=None for no header or " From 8fd8b1fad5a1656999b7364565638d686ae9671d Mon Sep 17 00:00:00 2001 From: phofl Date: Wed, 29 Dec 2021 17:52:57 +0100 Subject: [PATCH 5/9] Add overloads --- pandas/_typing.py | 1 + pandas/io/excel/_base.py | 72 ++++++++++++++++++++++++++++++++++++++-- 2 files changed, 71 insertions(+), 2 deletions(-) diff --git a/pandas/_typing.py b/pandas/_typing.py index eb5bb30238893..159d57fb27c89 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -84,6 +84,7 @@ DatetimeLikeScalar = Union["Period", "Timestamp", "Timedelta"] PandasScalar = Union["Period", "Timestamp", "Timedelta", "Interval"] Scalar = Union[PythonScalar, PandasScalar] +IntStrT = TypeVar("IntStrT", int, str) # timestamp and timedelta convertible types diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 38a62d558c51a..661c9c8b30704 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -17,6 +17,7 @@ Sequence, Union, cast, + overload, ) import warnings import zipfile @@ -27,6 +28,7 @@ from pandas._typing import ( DtypeArg, FilePath, + IntStrT, ReadBuffer, StorageOptions, WriteExcelBuffer, @@ -349,11 +351,77 @@ ) +@overload +def read_excel( + io, + sheet_name: str | int, + header: int | Sequence[int] | None = ..., + names=..., + index_col: int | Sequence[int] | None = ..., + usecols=..., + squeeze: bool | None = ..., + dtype: DtypeArg | None = ..., + engine: Literal["xlrd", "openpyxl", "odf", "pyxlsb"] | None = ..., + converters=..., + true_values: Iterable[Hashable] | None = ..., + false_values: Iterable[Hashable] | None = ..., + skiprows: Sequence[int] | int | Callable[[int], object] | None = ..., + nrows: int | None = ..., + na_values=..., + keep_default_na: bool = ..., + na_filter: bool = ..., + verbose: bool = ..., + parse_dates=..., + date_parser=..., + thousands: str | None = ..., + decimal: str = ..., + comment: str | None = ..., + skipfooter: int = ..., + convert_float: bool | None = ..., + mangle_dupe_cols: bool = ..., + storage_options: StorageOptions = ..., +) -> DataFrame: + ... + + +@overload +def read_excel( + io, + sheet_name: list[IntStrT] | None, + header: int | Sequence[int] | None = ..., + names=..., + index_col: int | Sequence[int] | None = ..., + usecols=..., + squeeze: bool | None = ..., + dtype: DtypeArg | None = ..., + engine: Literal["xlrd", "openpyxl", "odf", "pyxlsb"] | None = ..., + converters=..., + true_values: Iterable[Hashable] | None = ..., + false_values: Iterable[Hashable] | None = ..., + skiprows: Sequence[int] | int | Callable[[int], object] | None = ..., + nrows: int | None = ..., + na_values=..., + keep_default_na: bool = ..., + na_filter: bool = ..., + verbose: bool = ..., + parse_dates=..., + date_parser=..., + thousands: str | None = ..., + decimal: str = ..., + comment: str | None = ..., + skipfooter: int = ..., + convert_float: bool | None = ..., + mangle_dupe_cols: bool = ..., + storage_options: StorageOptions = ..., +) -> dict[IntStrT, DataFrame]: + ... + + @deprecate_nonkeyword_arguments(allowed_args=["io", "sheet_name"], version="2.0") @Appender(_read_excel_doc) def read_excel( io, - sheet_name: str | int | list[int] | list[str] | None = 0, + sheet_name: str | int | list[IntStrT] | None = 0, header: int | Sequence[int] | None = 0, names=None, index_col: int | Sequence[int] | None = None, @@ -379,7 +447,7 @@ def read_excel( convert_float: bool | None = None, mangle_dupe_cols: bool = True, storage_options: StorageOptions = None, -) -> DataFrame | dict[str, DataFrame] | dict[int, DataFrame]: +) -> DataFrame | dict[IntStrT, DataFrame]: should_close = False if not isinstance(io, ExcelFile): From ceb17c68d1a3460ea6c1ff9b4acd4fb52f2b9c66 Mon Sep 17 00:00:00 2001 From: phofl Date: Wed, 29 Dec 2021 21:52:47 +0100 Subject: [PATCH 6/9] Remove typevar --- pandas/_typing.py | 1 - pandas/io/excel/_base.py | 42 +++++++++++++++++++++++++++++++++++----- 2 files changed, 37 insertions(+), 6 deletions(-) diff --git a/pandas/_typing.py b/pandas/_typing.py index 159d57fb27c89..eb5bb30238893 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -84,7 +84,6 @@ DatetimeLikeScalar = Union["Period", "Timestamp", "Timedelta"] PandasScalar = Union["Period", "Timestamp", "Timedelta", "Interval"] Scalar = Union[PythonScalar, PandasScalar] -IntStrT = TypeVar("IntStrT", int, str) # timestamp and timedelta convertible types diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 661c9c8b30704..b881249e7544d 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -28,7 +28,6 @@ from pandas._typing import ( DtypeArg, FilePath, - IntStrT, ReadBuffer, StorageOptions, WriteExcelBuffer, @@ -387,7 +386,7 @@ def read_excel( @overload def read_excel( io, - sheet_name: list[IntStrT] | None, + sheet_name: list[int], header: int | Sequence[int] | None = ..., names=..., index_col: int | Sequence[int] | None = ..., @@ -413,7 +412,40 @@ def read_excel( convert_float: bool | None = ..., mangle_dupe_cols: bool = ..., storage_options: StorageOptions = ..., -) -> dict[IntStrT, DataFrame]: +) -> dict[int, DataFrame]: + ... + + +@overload +def read_excel( + io, + sheet_name: list[str] | None, + header: int | Sequence[int] | None = ..., + names=..., + index_col: int | Sequence[int] | None = ..., + usecols=..., + squeeze: bool | None = ..., + dtype: DtypeArg | None = ..., + engine: Literal["xlrd", "openpyxl", "odf", "pyxlsb"] | None = ..., + converters=..., + true_values: Iterable[Hashable] | None = ..., + false_values: Iterable[Hashable] | None = ..., + skiprows: Sequence[int] | int | Callable[[int], object] | None = ..., + nrows: int | None = ..., + na_values=..., + keep_default_na: bool = ..., + na_filter: bool = ..., + verbose: bool = ..., + parse_dates=..., + date_parser=..., + thousands: str | None = ..., + decimal: str = ..., + comment: str | None = ..., + skipfooter: int = ..., + convert_float: bool | None = ..., + mangle_dupe_cols: bool = ..., + storage_options: StorageOptions = ..., +) -> dict[str, DataFrame]: ... @@ -421,7 +453,7 @@ def read_excel( @Appender(_read_excel_doc) def read_excel( io, - sheet_name: str | int | list[IntStrT] | None = 0, + sheet_name: str | int | list[int] | list[str] | None = 0, header: int | Sequence[int] | None = 0, names=None, index_col: int | Sequence[int] | None = None, @@ -447,7 +479,7 @@ def read_excel( convert_float: bool | None = None, mangle_dupe_cols: bool = True, storage_options: StorageOptions = None, -) -> DataFrame | dict[IntStrT, DataFrame]: +) -> DataFrame | dict[int, DataFrame] | dict[str, DataFrame]: should_close = False if not isinstance(io, ExcelFile): From 8fdd3d2b044fad37a55f87ddd6a41cf2e29b7ddb Mon Sep 17 00:00:00 2001 From: phofl Date: Thu, 30 Dec 2021 16:06:46 +0100 Subject: [PATCH 7/9] Revert "Remove typevar" This reverts commit ceb17c68d1a3460ea6c1ff9b4acd4fb52f2b9c66. --- pandas/_typing.py | 1 + pandas/io/excel/_base.py | 42 +++++----------------------------------- 2 files changed, 6 insertions(+), 37 deletions(-) diff --git a/pandas/_typing.py b/pandas/_typing.py index eb5bb30238893..159d57fb27c89 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -84,6 +84,7 @@ DatetimeLikeScalar = Union["Period", "Timestamp", "Timedelta"] PandasScalar = Union["Period", "Timestamp", "Timedelta", "Interval"] Scalar = Union[PythonScalar, PandasScalar] +IntStrT = TypeVar("IntStrT", int, str) # timestamp and timedelta convertible types diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index b881249e7544d..661c9c8b30704 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -28,6 +28,7 @@ from pandas._typing import ( DtypeArg, FilePath, + IntStrT, ReadBuffer, StorageOptions, WriteExcelBuffer, @@ -386,7 +387,7 @@ def read_excel( @overload def read_excel( io, - sheet_name: list[int], + sheet_name: list[IntStrT] | None, header: int | Sequence[int] | None = ..., names=..., index_col: int | Sequence[int] | None = ..., @@ -412,40 +413,7 @@ def read_excel( convert_float: bool | None = ..., mangle_dupe_cols: bool = ..., storage_options: StorageOptions = ..., -) -> dict[int, DataFrame]: - ... - - -@overload -def read_excel( - io, - sheet_name: list[str] | None, - header: int | Sequence[int] | None = ..., - names=..., - index_col: int | Sequence[int] | None = ..., - usecols=..., - squeeze: bool | None = ..., - dtype: DtypeArg | None = ..., - engine: Literal["xlrd", "openpyxl", "odf", "pyxlsb"] | None = ..., - converters=..., - true_values: Iterable[Hashable] | None = ..., - false_values: Iterable[Hashable] | None = ..., - skiprows: Sequence[int] | int | Callable[[int], object] | None = ..., - nrows: int | None = ..., - na_values=..., - keep_default_na: bool = ..., - na_filter: bool = ..., - verbose: bool = ..., - parse_dates=..., - date_parser=..., - thousands: str | None = ..., - decimal: str = ..., - comment: str | None = ..., - skipfooter: int = ..., - convert_float: bool | None = ..., - mangle_dupe_cols: bool = ..., - storage_options: StorageOptions = ..., -) -> dict[str, DataFrame]: +) -> dict[IntStrT, DataFrame]: ... @@ -453,7 +421,7 @@ def read_excel( @Appender(_read_excel_doc) def read_excel( io, - sheet_name: str | int | list[int] | list[str] | None = 0, + sheet_name: str | int | list[IntStrT] | None = 0, header: int | Sequence[int] | None = 0, names=None, index_col: int | Sequence[int] | None = None, @@ -479,7 +447,7 @@ def read_excel( convert_float: bool | None = None, mangle_dupe_cols: bool = True, storage_options: StorageOptions = None, -) -> DataFrame | dict[int, DataFrame] | dict[str, DataFrame]: +) -> DataFrame | dict[IntStrT, DataFrame]: should_close = False if not isinstance(io, ExcelFile): From c751e8456084110ab6d7d84bd7ae9b458e3aa9f3 Mon Sep 17 00:00:00 2001 From: phofl Date: Thu, 30 Dec 2021 16:10:01 +0100 Subject: [PATCH 8/9] Add comments --- pandas/io/excel/_base.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 661c9c8b30704..b490244f7f396 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -354,6 +354,7 @@ @overload def read_excel( io, + # sheet name is str or int -> DataFrame sheet_name: str | int, header: int | Sequence[int] | None = ..., names=..., @@ -387,6 +388,7 @@ def read_excel( @overload def read_excel( io, + # sheet name is list or None -> dict[IntStrT, DataFrame] sheet_name: list[IntStrT] | None, header: int | Sequence[int] | None = ..., names=..., From 77d4d228877204c385fba980f5785026553f32f4 Mon Sep 17 00:00:00 2001 From: phofl Date: Thu, 30 Dec 2021 16:31:55 +0100 Subject: [PATCH 9/9] Bump pyright --- .github/workflows/code-checks.yml | 2 +- .pre-commit-config.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/code-checks.yml b/.github/workflows/code-checks.yml index 3a84a75be838f..634688d65e117 100644 --- a/.github/workflows/code-checks.yml +++ b/.github/workflows/code-checks.yml @@ -74,7 +74,7 @@ jobs: - name: Install pyright # note: keep version in sync with .pre-commit-config.yaml - run: npm install -g pyright@1.1.200 + run: npm install -g pyright@1.1.202 - name: Build Pandas id: build diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 19a8a127fa1a5..e3c9be941498f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -78,7 +78,7 @@ repos: types: [python] stages: [manual] # note: keep version in sync with .github/workflows/ci.yml - additional_dependencies: ['pyright@1.1.200'] + additional_dependencies: ['pyright@1.1.202'] - repo: local hooks: - id: flake8-rst