From 9e52f9ff200353c221551d4595def341852688e6 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Thu, 18 Aug 2022 17:59:19 +0700 Subject: [PATCH 1/2] DEPR: Deprecate mangle_dupe_cols --- doc/source/whatsnew/v1.5.0.rst | 1 + pandas/io/excel/_base.py | 7 +++++++ pandas/io/parsers/readers.py | 7 +++++++ pandas/tests/io/excel/test_writers.py | 18 +++++++++++++----- pandas/tests/io/parser/test_mangle_dupes.py | 10 +++++++++- pandas/tests/io/parser/test_unsupported.py | 7 +++++-- 6 files changed, 42 insertions(+), 8 deletions(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 48c808819d788..2e8dd505f92ca 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -852,6 +852,7 @@ Other Deprecations - Deprecated the ``inplace`` keyword in :meth:`DataFrame.set_index`, use ``df = df.set_index(..., copy=False)`` instead (:issue:`48115`) - Deprecated the ``sort_columns`` argument in :meth:`DataFrame.plot` and :meth:`Series.plot` (:issue:`47563`). - Deprecated positional arguments for all but the first argument of :meth:`DataFrame.to_stata` and :func:`read_stata`, use keyword arguments instead (:issue:`48128`). +- Deprecated the ``mangle_dupe_cols`` argument in :func:`read_csv`, :func:`read_fwf` and :func:`read_excel`. The argument was never implemented, and a new argument where the renaming pattern can be specified will be added instead (:issue:`47718`) .. --------------------------------------------------------------------------- .. _whatsnew_150.performance: diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 86ff0d569d2a3..79dc5ee3789e3 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -42,6 +42,7 @@ from pandas.errors import EmptyDataError from pandas.util._decorators import ( Appender, + deprecate_kwarg, deprecate_nonkeyword_arguments, doc, ) @@ -280,6 +281,11 @@ Duplicate columns will be specified as 'X', 'X.1', ...'X.N', rather than 'X'...'X'. Passing in False will cause data to be overwritten if there are duplicate names in the columns. + + .. deprecated:: 1.5.0 + Not implemented, and a new argument to specify the pattern for the + names of duplicated columns will be added instead + {storage_options} .. versionadded:: 1.2.0 @@ -433,6 +439,7 @@ def read_excel( @doc(storage_options=_shared_docs["storage_options"]) +@deprecate_kwarg(old_arg_name="mangle_dupe_cols", new_arg_name=None) @deprecate_nonkeyword_arguments(allowed_args=["io", "sheet_name"], version="2.0") @Appender(_read_excel_doc) def read_excel( diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py index 03a634cf07e26..1c3d37912743b 100644 --- a/pandas/io/parsers/readers.py +++ b/pandas/io/parsers/readers.py @@ -39,6 +39,7 @@ ) from pandas.util._decorators import ( Appender, + deprecate_kwarg, deprecate_nonkeyword_arguments, ) from pandas.util._exceptions import find_stack_level @@ -163,6 +164,10 @@ Duplicate columns will be specified as 'X', 'X.1', ...'X.N', rather than 'X'...'X'. Passing in False will cause data to be overwritten if there are duplicate names in the columns. + + .. deprecated:: 1.5.0 + Not implemented, and a new argument to specify the pattern for the + names of duplicated columns will be added instead dtype : Type name or dict of column -> type, optional Data type for data or columns. E.g. {{'a': np.float64, 'b': np.int32, 'c': 'Int64'}} @@ -846,6 +851,7 @@ def read_csv( ... +@deprecate_kwarg(old_arg_name="mangle_dupe_cols", new_arg_name=None) @deprecate_nonkeyword_arguments(version=None, allowed_args=["filepath_or_buffer"]) @Appender( _doc_read_csv_and_table.format( @@ -1184,6 +1190,7 @@ def read_table( ... +@deprecate_kwarg(old_arg_name="mangle_dupe_cols", new_arg_name=None) @deprecate_nonkeyword_arguments(version=None, allowed_args=["filepath_or_buffer"]) @Appender( _doc_read_csv_and_table.format( diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index ba6366b71d854..01aeea7addf8e 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -976,9 +976,12 @@ def test_duplicated_columns(self, path): tm.assert_frame_equal(result, expected) # Explicitly, we pass in the parameter. - result = pd.read_excel( - path, sheet_name="test1", index_col=0, mangle_dupe_cols=True - ) + with tm.assert_produces_warning( + FutureWarning, match="the 'mangle_dupe_cols' keyword is deprecated" + ): + result = pd.read_excel( + path, sheet_name="test1", index_col=0, mangle_dupe_cols=True + ) tm.assert_frame_equal(result, expected) # see gh-11007, gh-10970 @@ -999,8 +1002,13 @@ def test_duplicated_columns(self, path): tm.assert_frame_equal(result, expected) msg = "Setting mangle_dupe_cols=False is not supported yet" - with pytest.raises(ValueError, match=msg): - pd.read_excel(path, sheet_name="test1", header=None, mangle_dupe_cols=False) + with tm.assert_produces_warning( + FutureWarning, match="the 'mangle_dupe_cols' keyword is deprecated" + ): + with pytest.raises(ValueError, match=msg): + pd.read_excel( + path, sheet_name="test1", header=None, mangle_dupe_cols=False + ) def test_swapped_columns(self, path): # Test for issue #5427. diff --git a/pandas/tests/io/parser/test_mangle_dupes.py b/pandas/tests/io/parser/test_mangle_dupes.py index 3f7b1b5dfa19b..13b419c3390fc 100644 --- a/pandas/tests/io/parser/test_mangle_dupes.py +++ b/pandas/tests/io/parser/test_mangle_dupes.py @@ -21,7 +21,15 @@ def test_basic(all_parsers, kwargs): parser = all_parsers data = "a,a,b,b,b\n1,2,3,4,5" - result = parser.read_csv(StringIO(data), sep=",", **kwargs) + if "mangle_dupe_cols" in kwargs: + with tm.assert_produces_warning( + FutureWarning, + match="the 'mangle_dupe_cols' keyword is deprecated", + check_stacklevel=False, + ): + result = parser.read_csv(StringIO(data), sep=",", **kwargs) + else: + result = parser.read_csv(StringIO(data), sep=",", **kwargs) expected = DataFrame([[1, 2, 3, 4, 5]], columns=["a", "a.1", "b", "b.1", "b.2"]) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/parser/test_unsupported.py b/pandas/tests/io/parser/test_unsupported.py index 2f28697daf9e2..578cea44a8ed6 100644 --- a/pandas/tests/io/parser/test_unsupported.py +++ b/pandas/tests/io/parser/test_unsupported.py @@ -37,8 +37,11 @@ def test_mangle_dupe_cols_false(self): msg = "is not supported" for engine in ("c", "python"): - with pytest.raises(ValueError, match=msg): - read_csv(StringIO(data), engine=engine, mangle_dupe_cols=False) + with tm.assert_produces_warning( + FutureWarning, match="the 'mangle_dupe_cols' keyword is deprecated" + ): + with pytest.raises(ValueError, match=msg): + read_csv(StringIO(data), engine=engine, mangle_dupe_cols=False) def test_c_engine(self): # see gh-6607 From 85b47b7ae55cef5402fc63282b67b50b094feae0 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Thu, 18 Aug 2022 18:46:51 +0700 Subject: [PATCH 2/2] Add read_table to whatsnew --- doc/source/whatsnew/v1.5.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 2e8dd505f92ca..75b21c902ad89 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -852,7 +852,7 @@ Other Deprecations - Deprecated the ``inplace`` keyword in :meth:`DataFrame.set_index`, use ``df = df.set_index(..., copy=False)`` instead (:issue:`48115`) - Deprecated the ``sort_columns`` argument in :meth:`DataFrame.plot` and :meth:`Series.plot` (:issue:`47563`). - Deprecated positional arguments for all but the first argument of :meth:`DataFrame.to_stata` and :func:`read_stata`, use keyword arguments instead (:issue:`48128`). -- Deprecated the ``mangle_dupe_cols`` argument in :func:`read_csv`, :func:`read_fwf` and :func:`read_excel`. The argument was never implemented, and a new argument where the renaming pattern can be specified will be added instead (:issue:`47718`) +- Deprecated the ``mangle_dupe_cols`` argument in :func:`read_csv`, :func:`read_fwf`, :func:`read_table` and :func:`read_excel`. The argument was never implemented, and a new argument where the renaming pattern can be specified will be added instead (:issue:`47718`) .. --------------------------------------------------------------------------- .. _whatsnew_150.performance: