From 098cb6b40431b8934ad69fd2d42224e7e4b1e7e5 Mon Sep 17 00:00:00 2001 From: Pacific <12509045+pacificdragon@users.noreply.github.com> Date: Sun, 12 Feb 2023 01:14:26 +0530 Subject: [PATCH 1/6] Add Deprecation FutureWarning for parse function call --- doc/source/whatsnew/v2.0.0.rst | 1 + pandas/io/excel/_base.py | 12 +++++++++++- pandas/tests/io/excel/test_readers.py | 8 ++++++++ 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 410690de1a1ec..9542d82c75e8a 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -1303,6 +1303,7 @@ I/O - Bug in :meth:`DataFrame.to_dict` not converting ``NA`` to ``None`` (:issue:`50795`) - Bug in :meth:`DataFrame.to_json` where it would segfault when failing to encode a string (:issue:`50307`) - Bug in :func:`read_xml` where file-like objects failed when iterparse is used (:issue:`50641`) +- Bug in :func:`read_excel` where passing invalid argument name ``headers`` to :meth:`parse` doesn't raise error (:issue:`50953`) Period ^^^^^^ diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 79d174db5c0a7..7439642b31bfd 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -21,6 +21,7 @@ cast, overload, ) +import warnings import zipfile from pandas._config import ( @@ -47,6 +48,7 @@ Appender, doc, ) +from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.common import ( is_bool, @@ -1552,12 +1554,20 @@ def parse( Equivalent to read_excel(ExcelFile, ...) See the read_excel docstring for more info on accepted parameters. - + .. deprecated:: 2.0.0 + Arguments other than sheet_name by position may not work. Returns ------- DataFrame or dict of DataFrames DataFrame from the passed in Excel file. """ + if kwds: + warnings.warn( + f"{type(self).__name__}.parse is deprecated. " + "Arguments other than sheet_name by position may not work.", + FutureWarning, + stacklevel=find_stack_level(), + ) return self._reader.parse( sheet_name=sheet_name, header=header, diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index 3f2fecbfb48a6..df5d59e0c70f8 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -1684,3 +1684,11 @@ def test_corrupt_files_closed(self, engine, read_ext): pd.ExcelFile(file, engine=engine) except errors: pass + + def test_read_excel_parse_warning(self, read_ext): + # GH50953 + msg = "Arguments other than sheet_name by position may not work." + with tm.assert_produces_warning(FutureWarning, match=msg): + with pd.ExcelFile("test1" + read_ext) as excel: + excel.parse("Sheet1", headers=[0, 1, 2]) + # invalid argument 'headers' should give warning for deprecation From e5b60930414cdeb8995207bdbaba6c25b37fa5ad Mon Sep 17 00:00:00 2001 From: Pacific <12509045+pacificdragon@users.noreply.github.com> Date: Sun, 12 Feb 2023 18:05:26 +0530 Subject: [PATCH 2/6] Adding list of allowed args and correcting doc indentation --- pandas/io/excel/_base.py | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 7439642b31bfd..6e9723a4823dc 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -1554,14 +1554,44 @@ def parse( Equivalent to read_excel(ExcelFile, ...) See the read_excel docstring for more info on accepted parameters. + .. deprecated:: 2.0.0 - Arguments other than sheet_name by position may not work. + Arguments other than sheet_name by position may not work. + Returns ------- DataFrame or dict of DataFrames DataFrame from the passed in Excel file. """ - if kwds: + arguments = list(kwds.keys()) + allowed_kwargs = [ + "sheet_name", + "header", + "names", + "index_col", + "usecols", + "squeeze", + "dtype", + "engine", + "converters", + "true_values", + "false_values", + "skiprows", + "nrows", + "na_values", + "keep_default_na", + "na_filter", + "verbose", + "parse_dates", + "date_parser", + "thousands", + "decimal", + "comment", + "skipfooter", + "convert_float", + ] + # Check for any invalid kwargs + if [argument for argument in arguments if argument not in allowed_kwargs]: warnings.warn( f"{type(self).__name__}.parse is deprecated. " "Arguments other than sheet_name by position may not work.", From e497e48b701ca57ed067e880c8a50c9b5ce4c16f Mon Sep 17 00:00:00 2001 From: Pacific <12509045+pacificdragon@users.noreply.github.com> Date: Thu, 16 Feb 2023 01:02:32 +0530 Subject: [PATCH 3/6] Fix deprecation warning location in doc string --- pandas/io/excel/_base.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 6e9723a4823dc..15e856c4d0ac1 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -1552,12 +1552,12 @@ def parse( """ Parse specified sheet(s) into a DataFrame. - Equivalent to read_excel(ExcelFile, ...) See the read_excel - docstring for more info on accepted parameters. - .. deprecated:: 2.0.0 Arguments other than sheet_name by position may not work. + Equivalent to read_excel(ExcelFile, ...) See the read_excel + docstring for more info on accepted parameters. + Returns ------- DataFrame or dict of DataFrames From e989751e4ed3df976d269da1e3f8de0e4531e580 Mon Sep 17 00:00:00 2001 From: Pacific <12509045+pacificdragon@users.noreply.github.com> Date: Thu, 16 Feb 2023 23:52:54 +0530 Subject: [PATCH 4/6] Update v2.0.0.rst --- doc/source/whatsnew/v2.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index abc03c038c5d0..d6f8ad932c859 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -1328,6 +1328,7 @@ I/O - Bug in :meth:`DataFrame.to_dict` not converting ``NA`` to ``None`` (:issue:`50795`) - Bug in :meth:`DataFrame.to_json` where it would segfault when failing to encode a string (:issue:`50307`) - Bug in :func:`read_xml` where file-like objects failed when iterparse is used (:issue:`50641`) +- Bug in :func:`read_xml` ignored repeated elements when iterparse is used (:issue:`51183`) - Bug in :func:`read_excel` where passing invalid argument name ``headers`` to :meth:`parse` doesn't raise error (:issue:`50953`) Period From 0fb4f503ec54ad56e9c5a9786605487b879fbe90 Mon Sep 17 00:00:00 2001 From: Pacific <12509045+pacificdragon@users.noreply.github.com> Date: Mon, 27 Feb 2023 15:37:29 +0000 Subject: [PATCH 5/6] Align with requested changes --- pandas/io/excel/_base.py | 47 ++++++---------------------------------- 1 file changed, 7 insertions(+), 40 deletions(-) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index bdfc3f529a3cf..15a34e116bec0 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -1530,30 +1530,15 @@ def __fspath__(self): def parse( self, sheet_name: str | int | list[int] | list[str] | None = 0, - header: int | Sequence[int] | None = 0, - names=None, - index_col: int | Sequence[int] | None = None, - usecols=None, - converters=None, - true_values: Iterable[Hashable] | None = None, - false_values: Iterable[Hashable] | None = None, - skiprows: Sequence[int] | int | Callable[[int], object] | None = None, - nrows: int | None = None, - na_values=None, - parse_dates: list | dict | bool = False, - date_parser: Callable | lib.NoDefault = lib.no_default, - date_format: str | dict[Hashable, str] | None = None, - thousands: str | None = None, - comment: str | None = None, - skipfooter: int = 0, - use_nullable_dtypes: bool = False, + *args, **kwds, ) -> DataFrame | dict[str, DataFrame] | dict[int, DataFrame]: """ Parse specified sheet(s) into a DataFrame. .. deprecated:: 2.0.0 - Arguments other than sheet_name by position may not work. + Specifying arguments other than sheet_name by position is deprecated. + Specify arguments by keyword name instead. Equivalent to read_excel(ExcelFile, ...) See the read_excel docstring for more info on accepted parameters. @@ -1563,7 +1548,6 @@ def parse( DataFrame or dict of DataFrames DataFrame from the passed in Excel file. """ - arguments = list(kwds.keys()) allowed_kwargs = [ "sheet_name", "header", @@ -1591,32 +1575,15 @@ def parse( "convert_float", ] # Check for any invalid kwargs - if [argument for argument in arguments if argument not in allowed_kwargs]: + if any(key in allowed_kwargs for key in kwds): warnings.warn( - f"{type(self).__name__}.parse is deprecated. " - "Arguments other than sheet_name by position may not work.", + "Specifying arguments other than sheet_name by position is deprecated." + "Specify arguments by keyword name instead.", FutureWarning, stacklevel=find_stack_level(), ) return self._reader.parse( - sheet_name=sheet_name, - header=header, - names=names, - index_col=index_col, - usecols=usecols, - converters=converters, - true_values=true_values, - false_values=false_values, - skiprows=skiprows, - nrows=nrows, - na_values=na_values, - parse_dates=parse_dates, - date_parser=date_parser, - date_format=date_format, - thousands=thousands, - comment=comment, - skipfooter=skipfooter, - use_nullable_dtypes=use_nullable_dtypes, + *args, **kwds, ) From c6d78139220cabdb14c2b5cb60dd3ee704094427 Mon Sep 17 00:00:00 2001 From: Pacific <12509045+pacificdragon@users.noreply.github.com> Date: Mon, 27 Feb 2023 16:44:55 +0000 Subject: [PATCH 6/6] changed unit test --- pandas/io/excel/_base.py | 3 +-- pandas/tests/io/excel/test_readers.py | 6 +++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 15a34e116bec0..c53fe4d37c11d 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -1549,7 +1549,6 @@ def parse( DataFrame from the passed in Excel file. """ allowed_kwargs = [ - "sheet_name", "header", "names", "index_col", @@ -1574,7 +1573,6 @@ def parse( "skipfooter", "convert_float", ] - # Check for any invalid kwargs if any(key in allowed_kwargs for key in kwds): warnings.warn( "Specifying arguments other than sheet_name by position is deprecated." @@ -1583,6 +1581,7 @@ def parse( stacklevel=find_stack_level(), ) return self._reader.parse( + sheet_name, *args, **kwds, ) diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index 42e76afb20caa..88169c9011c2f 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -1679,8 +1679,8 @@ def test_corrupt_files_closed(self, engine, read_ext): def test_read_excel_parse_warning(self, read_ext): # GH50953 - msg = "Arguments other than sheet_name by position may not work." + msg = "Specifying arguments other than sheet_name by position is deprecated." + "Specify arguments by keyword name instead." with tm.assert_produces_warning(FutureWarning, match=msg): with pd.ExcelFile("test1" + read_ext) as excel: - excel.parse("Sheet1", headers=[0, 1, 2]) - # invalid argument 'headers' should give warning for deprecation + excel.parse("Sheet1", header=[0, 1, 2])