From 37261cef1443fee7c3bd1754ec495ab5e79c0fe2 Mon Sep 17 00:00:00 2001 From: Thomas Li Date: Mon, 6 Sep 2021 10:12:40 -0700 Subject: [PATCH 1/9] DEPR: squeeze argument in read_csv/read_table/read_excel --- doc/source/whatsnew/v1.4.0.rst | 1 + pandas/io/excel/_base.py | 77 +++++++++++-------- pandas/io/parsers/base_parser.py | 2 +- pandas/io/parsers/c_parser_wrapper.py | 1 - pandas/io/parsers/readers.py | 23 ++++-- pandas/tests/io/excel/test_readers.py | 27 ++++--- .../io/parser/common/test_common_basic.py | 26 +++++-- 7 files changed, 100 insertions(+), 57 deletions(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 3b9ddf8138689..de20c200ba1bc 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -275,6 +275,7 @@ Other Deprecations - Deprecated :meth:`Index.reindex` with a non-unique index (:issue:`42568`) - Deprecated :meth:`.Styler.render` in favour of :meth:`.Styler.to_html` (:issue:`42140`) - Deprecated passing in a string column label into ``times`` in :meth:`DataFrame.ewm` (:issue:`43265`) +- Deprecated the ``squeeze`` argument to :meth:`read_csv`, :meth:`read_table`, and :meth:`read_excel`. Users should squeeze the DataFrame afterwards with ``.squeeze("columns")`` instead. (:issue:`43242`) .. --------------------------------------------------------------------------- diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 52d1e1c83d3e6..b1a2a3824efc9 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -121,6 +121,9 @@ Returns a subset of the columns according to behavior above. squeeze : bool, default False If the parsed data only contains one column then return a Series. + .. deprecated:: 1.4.0 + Append ``.squeeze("columns")`` to the call to ``read_excel`` to squeeze + the data. dtype : Type name or dict of column -> type, default None Data type for data or columns. E.g. {'a': np.float64, 'b': np.int32} Use `object` to preserve data as stored in Excel and not interpret dtype. @@ -337,7 +340,7 @@ def read_excel( names=None, index_col=None, usecols=None, - squeeze=False, + squeeze=None, dtype: DtypeArg | None = None, engine=None, converters=None, @@ -481,7 +484,7 @@ def parse( names=None, index_col=None, usecols=None, - squeeze=False, + squeeze=None, dtype: DtypeArg | None = None, true_values=None, false_values=None, @@ -598,41 +601,49 @@ def parse( data[row][col] = last else: last = data[row][col] - + future_warnings = [] # GH 12292 : error when read one empty column from excel file try: - parser = TextParser( - data, - names=names, - header=header, - index_col=index_col, - has_index_names=has_index_names, - squeeze=squeeze, - dtype=dtype, - true_values=true_values, - false_values=false_values, - skiprows=skiprows, - nrows=nrows, - na_values=na_values, - skip_blank_lines=False, # GH 39808 - parse_dates=parse_dates, - date_parser=date_parser, - thousands=thousands, - comment=comment, - skipfooter=skipfooter, - usecols=usecols, - mangle_dupe_cols=mangle_dupe_cols, - **kwds, - ) + # Gotta catch deprecation warnings to raise at correct stacklevel :( + with warnings.catch_warnings(record=True) as w: + parser = TextParser( + data, + names=names, + header=header, + index_col=index_col, + has_index_names=has_index_names, + squeeze=squeeze, + dtype=dtype, + true_values=true_values, + false_values=false_values, + skiprows=skiprows, + nrows=nrows, + na_values=na_values, + skip_blank_lines=False, # GH 39808 + parse_dates=parse_dates, + date_parser=date_parser, + thousands=thousands, + comment=comment, + skipfooter=skipfooter, + usecols=usecols, + mangle_dupe_cols=mangle_dupe_cols, + **kwds, + ) - output[asheetname] = parser.read(nrows=nrows) + output[asheetname] = parser.read(nrows=nrows) - if not squeeze or isinstance(output[asheetname], DataFrame): - if header_names: - output[asheetname].columns = output[ - asheetname - ].columns.set_names(header_names) + if not squeeze or isinstance(output[asheetname], DataFrame): + if header_names: + output[asheetname].columns = output[ + asheetname + ].columns.set_names(header_names) + # Record warning messages, can't raise here since it would be + # suppressed again + for warning in w: + future_warnings.append(str(warning.message)) + for warning in future_warnings: + warnings.warn(warning, FutureWarning, stacklevel=5) except EmptyDataError: # No Data, return an empty DataFrame output[asheetname] = DataFrame() @@ -1243,7 +1254,7 @@ def parse( names=None, index_col=None, usecols=None, - squeeze=False, + squeeze=None, converters=None, true_values=None, false_values=None, diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py index c7516a2df50f6..983f7b6a20a48 100644 --- a/pandas/io/parsers/base_parser.py +++ b/pandas/io/parsers/base_parser.py @@ -104,7 +104,7 @@ "chunksize": None, "verbose": False, "encoding": None, - "squeeze": False, + "squeeze": None, "compression": None, "mangle_dupe_cols": True, "infer_datetime_format": False, diff --git a/pandas/io/parsers/c_parser_wrapper.py b/pandas/io/parsers/c_parser_wrapper.py index 57ae61548d8d0..7998fe57b58c8 100644 --- a/pandas/io/parsers/c_parser_wrapper.py +++ b/pandas/io/parsers/c_parser_wrapper.py @@ -33,7 +33,6 @@ class CParserWrapper(ParserBase): def __init__(self, src: FilePathOrBuffer, **kwds): self.kwds = kwds kwds = kwds.copy() - ParserBase.__init__(self, kwds) self.low_memory = kwds.pop("low_memory", False) diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py index b4a7a4f5e9325..0782da49dc939 100644 --- a/pandas/io/parsers/readers.py +++ b/pandas/io/parsers/readers.py @@ -131,6 +131,10 @@ parsing time and lower memory usage. squeeze : bool, default False If the parsed data only contains one column then return a Series. + + .. deprecated:: 1.4.0 + Append ``.squeeze("columns")`` to the call to ``read_csv`` to squeeze + the data. prefix : str, optional Prefix to add to column numbers when no header, e.g. 'X' for X0, X1, ... mangle_dupe_cols : bool, default True @@ -439,7 +443,11 @@ "low_memory", } -_deprecated_defaults: dict[str, Any] = {"error_bad_lines": None, "warn_bad_lines": None} +_deprecated_defaults: dict[str, Any] = { + "error_bad_lines": None, + "warn_bad_lines": None, + "squeeze": None, +} def validate_integer(name, val, min_val=0): @@ -552,7 +560,7 @@ def read_csv( names=lib.no_default, index_col=None, usecols=None, - squeeze=False, + squeeze=None, prefix=lib.no_default, mangle_dupe_cols=True, # General Parsing Configuration @@ -650,7 +658,7 @@ def read_table( names=lib.no_default, index_col=None, usecols=None, - squeeze=False, + squeeze=None, prefix=lib.no_default, mangle_dupe_cols=True, # General Parsing Configuration @@ -867,11 +875,12 @@ def __init__(self, f, engine=None, **kwds): self.chunksize = options.pop("chunksize", None) self.nrows = options.pop("nrows", None) - self.squeeze = options.pop("squeeze", False) self._check_file_or_buffer(f, engine) self.options, self.engine = self._clean_options(options, engine) + self.squeeze = self.options.pop("squeeze", False) + if "has_index_names" in kwds: self.options["has_index_names"] = kwds["has_index_names"] @@ -1100,6 +1109,10 @@ def _clean_options(self, options, engine): result["na_values"] = na_values result["na_fvalues"] = na_fvalues result["skiprows"] = skiprows + # Default for squeeze is none since we need to check + # if user sets it, we set to False since behavior is + # equivlent + result["squeeze"] = False if options["squeeze"] is None else options["squeeze"] return result, engine @@ -1149,7 +1162,7 @@ def read(self, nrows=None): self._currow += new_rows if self.squeeze and len(df.columns) == 1: - return df[df.columns[0]].copy() + return df.squeeze("columns").copy() return df def get_chunk(self, size=None): diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index f999733192725..657e64bd01809 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -1194,18 +1194,25 @@ def test_read_excel_squeeze(self, read_ext): # GH 12157 f = "test_squeeze" + read_ext - actual = pd.read_excel(f, sheet_name="two_columns", index_col=0, squeeze=True) - expected = Series([2, 3, 4], [4, 5, 6], name="b") - expected.index.name = "a" - tm.assert_series_equal(actual, expected) + with tm.assert_produces_warning( + FutureWarning, + match="The squeeze argument has been deprecated " + "and will be removed in a future version.\n\n", + ): + actual = pd.read_excel( + f, sheet_name="two_columns", index_col=0, squeeze=True + ) + expected = Series([2, 3, 4], [4, 5, 6], name="b") + expected.index.name = "a" + tm.assert_series_equal(actual, expected) - actual = pd.read_excel(f, sheet_name="two_columns", squeeze=True) - expected = DataFrame({"a": [4, 5, 6], "b": [2, 3, 4]}) - tm.assert_frame_equal(actual, expected) + actual = pd.read_excel(f, sheet_name="two_columns", squeeze=True) + expected = DataFrame({"a": [4, 5, 6], "b": [2, 3, 4]}) + tm.assert_frame_equal(actual, expected) - actual = pd.read_excel(f, sheet_name="one_column", squeeze=True) - expected = Series([1, 2, 3], name="a") - tm.assert_series_equal(actual, expected) + actual = pd.read_excel(f, sheet_name="one_column", squeeze=True) + expected = Series([1, 2, 3], name="a") + tm.assert_series_equal(actual, expected) def test_deprecated_kwargs(self, read_ext): with tm.assert_produces_warning(FutureWarning, raise_on_extra_warnings=False): diff --git a/pandas/tests/io/parser/common/test_common_basic.py b/pandas/tests/io/parser/common/test_common_basic.py index 841df0ea7e470..8d517f04f3219 100644 --- a/pandas/tests/io/parser/common/test_common_basic.py +++ b/pandas/tests/io/parser/common/test_common_basic.py @@ -128,7 +128,8 @@ def test_1000_sep(all_parsers): tm.assert_frame_equal(result, expected) -def test_squeeze(all_parsers): +@pytest.mark.parametrize("squeeze", [True, False]) +def test_squeeze(all_parsers, squeeze): data = """\ a,1 b,2 @@ -138,13 +139,24 @@ def test_squeeze(all_parsers): index = Index(["a", "b", "c"], name=0) expected = Series([1, 2, 3], name=1, index=index) - result = parser.read_csv(StringIO(data), index_col=0, header=None, squeeze=True) - tm.assert_series_equal(result, expected) + with tm.assert_produces_warning( + FutureWarning, + match="The squeeze argument has been deprecated " + "and will be removed in a future version.\n\n", + ): + result = parser.read_csv( + StringIO(data), index_col=0, header=None, squeeze=squeeze + ) + if not squeeze: + expected = DataFrame(expected) + tm.assert_frame_equal(result, expected) + else: + tm.assert_series_equal(result, expected) - # see gh-8217 - # - # Series should not be a view. - assert not result._is_view + # see gh-8217 + # + # Series should not be a view. + assert not result._is_view @xfail_pyarrow From 41ce64a5e82579d93bed4c6a3902675181643588 Mon Sep 17 00:00:00 2001 From: Thomas Li <47963215+lithomas1@users.noreply.github.com> Date: Mon, 6 Sep 2021 11:53:23 -0700 Subject: [PATCH 2/9] doc --- doc/source/user_guide/io.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index 4c7b13bcf989f..ab9dec2365971 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -1208,6 +1208,10 @@ Returning Series Using the ``squeeze`` keyword, the parser will return output with a single column as a ``Series``: +.. deprecated:: 1.4.0 + Users should append ``.squeeze("columns")`` to the DataFrame returned by + ``read_csv`` instead. + .. ipython:: python :suppress: From d6b977b5240d1acb4cb5da31cfc4f08537976711 Mon Sep 17 00:00:00 2001 From: Thomas Li Date: Wed, 8 Sep 2021 16:17:46 -0700 Subject: [PATCH 3/9] Fixes --- pandas/io/excel/_base.py | 68 ++++++++----------- pandas/io/parsers/readers.py | 3 +- pandas/tests/frame/methods/test_to_csv.py | 3 +- .../io/parser/common/test_common_basic.py | 40 +++++------ .../tests/io/parser/common/test_iterator.py | 4 +- pandas/tests/io/parser/conftest.py | 11 +++ pandas/tests/io/test_compression.py | 6 +- pandas/tests/series/methods/test_to_csv.py | 15 ++-- 8 files changed, 79 insertions(+), 71 deletions(-) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index b1a2a3824efc9..8284229aa158d 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -601,49 +601,41 @@ def parse( data[row][col] = last else: last = data[row][col] - future_warnings = [] + # GH 12292 : error when read one empty column from excel file try: - # Gotta catch deprecation warnings to raise at correct stacklevel :( - with warnings.catch_warnings(record=True) as w: - parser = TextParser( - data, - names=names, - header=header, - index_col=index_col, - has_index_names=has_index_names, - squeeze=squeeze, - dtype=dtype, - true_values=true_values, - false_values=false_values, - skiprows=skiprows, - nrows=nrows, - na_values=na_values, - skip_blank_lines=False, # GH 39808 - parse_dates=parse_dates, - date_parser=date_parser, - thousands=thousands, - comment=comment, - skipfooter=skipfooter, - usecols=usecols, - mangle_dupe_cols=mangle_dupe_cols, - **kwds, - ) + parser = TextParser( + data, + names=names, + header=header, + index_col=index_col, + has_index_names=has_index_names, + squeeze=squeeze, + dtype=dtype, + true_values=true_values, + false_values=false_values, + skiprows=skiprows, + nrows=nrows, + na_values=na_values, + skip_blank_lines=False, # GH 39808 + parse_dates=parse_dates, + date_parser=date_parser, + thousands=thousands, + comment=comment, + skipfooter=skipfooter, + usecols=usecols, + mangle_dupe_cols=mangle_dupe_cols, + **kwds, + ) - output[asheetname] = parser.read(nrows=nrows) + output[asheetname] = parser.read(nrows=nrows) - if not squeeze or isinstance(output[asheetname], DataFrame): - if header_names: - output[asheetname].columns = output[ - asheetname - ].columns.set_names(header_names) + if not squeeze or isinstance(output[asheetname], DataFrame): + if header_names: + output[asheetname].columns = output[ + asheetname + ].columns.set_names(header_names) - # Record warning messages, can't raise here since it would be - # suppressed again - for warning in w: - future_warnings.append(str(warning.message)) - for warning in future_warnings: - warnings.warn(warning, FutureWarning, stacklevel=5) except EmptyDataError: # No Data, return an empty DataFrame output[asheetname] = DataFrame() diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py index 0782da49dc939..ec58e11aa71db 100644 --- a/pandas/io/parsers/readers.py +++ b/pandas/io/parsers/readers.py @@ -28,6 +28,7 @@ Appender, deprecate_nonkeyword_arguments, ) +from pandas.util._exceptions import find_stack_level from pandas.util._validators import validate_bool_kwarg from pandas.core.dtypes.common import ( @@ -1059,7 +1060,7 @@ def _clean_options(self, options, engine): f"The {arg} argument has been deprecated and will be " "removed in a future version.\n\n" ) - warnings.warn(msg, FutureWarning, stacklevel=7) + warnings.warn(msg, FutureWarning, stacklevel=find_stack_level()) else: result[arg] = parser_default diff --git a/pandas/tests/frame/methods/test_to_csv.py b/pandas/tests/frame/methods/test_to_csv.py index 5156d0371e9b7..be84eb9c85663 100644 --- a/pandas/tests/frame/methods/test_to_csv.py +++ b/pandas/tests/frame/methods/test_to_csv.py @@ -1039,8 +1039,7 @@ def test_to_csv_compression(self, df, encoding, compression): compression=compression, encoding=encoding, index_col=0, - squeeze=True, - ) + ).squeeze("columns") tm.assert_frame_equal(df, result) # explicitly make sure file is compressed diff --git a/pandas/tests/io/parser/common/test_common_basic.py b/pandas/tests/io/parser/common/test_common_basic.py index 8d517f04f3219..3e8e70e7f94a9 100644 --- a/pandas/tests/io/parser/common/test_common_basic.py +++ b/pandas/tests/io/parser/common/test_common_basic.py @@ -139,24 +139,25 @@ def test_squeeze(all_parsers, squeeze): index = Index(["a", "b", "c"], name=0) expected = Series([1, 2, 3], name=1, index=index) - with tm.assert_produces_warning( + result = parser.read_csv_check_warnings( FutureWarning, - match="The squeeze argument has been deprecated " + "The squeeze argument has been deprecated " "and will be removed in a future version.\n\n", - ): - result = parser.read_csv( - StringIO(data), index_col=0, header=None, squeeze=squeeze - ) - if not squeeze: - expected = DataFrame(expected) - tm.assert_frame_equal(result, expected) - else: - tm.assert_series_equal(result, expected) + StringIO(data), + index_col=0, + header=None, + squeeze=squeeze, + ) + if not squeeze: + expected = DataFrame(expected) + tm.assert_frame_equal(result, expected) + else: + tm.assert_series_equal(result, expected) - # see gh-8217 - # - # Series should not be a view. - assert not result._is_view + # see gh-8217 + # + # Series should not be a view. + assert not result._is_view @xfail_pyarrow @@ -859,12 +860,13 @@ def test_deprecated_bad_lines_warns(all_parsers, csv1, on_bad_lines): # GH 15122 parser = all_parsers kwds = {f"{on_bad_lines}_bad_lines": False} - with tm.assert_produces_warning( + parser.read_csv_check_warnings( FutureWarning, - match=f"The {on_bad_lines}_bad_lines argument has been deprecated " + f"The {on_bad_lines}_bad_lines argument has been deprecated " "and will be removed in a future version.\n\n", - ): - parser.read_csv(csv1, **kwds) + csv1, + **kwds, + ) def test_malformed_second_line(all_parsers): diff --git a/pandas/tests/io/parser/common/test_iterator.py b/pandas/tests/io/parser/common/test_iterator.py index 06ce0687e513a..3ffa10b73848b 100644 --- a/pandas/tests/io/parser/common/test_iterator.py +++ b/pandas/tests/io/parser/common/test_iterator.py @@ -94,7 +94,7 @@ def test_iterator_skipfooter_errors(all_parsers, kwargs): def test_iteration_open_handle(all_parsers): parser = all_parsers - kwargs = {"squeeze": True, "header": None} + kwargs = {"header": None} with tm.ensure_clean() as path: with open(path, "w") as f: @@ -105,6 +105,6 @@ def test_iteration_open_handle(all_parsers): if "CCC" in line: break - result = parser.read_csv(f, **kwargs) + result = parser.read_csv(f, **kwargs).squeeze("columns") expected = Series(["DDD", "EEE", "FFF", "GGG"], name=0) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/io/parser/conftest.py b/pandas/tests/io/parser/conftest.py index 372034e552b25..d5fb467e5b01d 100644 --- a/pandas/tests/io/parser/conftest.py +++ b/pandas/tests/io/parser/conftest.py @@ -10,6 +10,7 @@ read_csv, read_table, ) +import pandas._testing as tm class BaseParser: @@ -27,6 +28,16 @@ def read_csv(self, *args, **kwargs): kwargs = self.update_kwargs(kwargs) return read_csv(*args, **kwargs) + def read_csv_check_warnings( + self, warn_type: Warning, warn_msg: str, *args, **kwargs + ): + # We need to check the stacklevel here instead of in the tests + # since this is where read_csv is called and where the warning + # should point to. + kwargs = self.update_kwargs(kwargs) + with tm.assert_produces_warning(warn_type, match=warn_msg): + return read_csv(*args, **kwargs) + def read_table(self, *args, **kwargs): kwargs = self.update_kwargs(kwargs) return read_table(*args, **kwargs) diff --git a/pandas/tests/io/test_compression.py b/pandas/tests/io/test_compression.py index 6c90830639061..f6f972fcf4f6a 100644 --- a/pandas/tests/io/test_compression.py +++ b/pandas/tests/io/test_compression.py @@ -95,7 +95,11 @@ def test_series_compression_defaults_to_infer( extension = icom._compression_to_extension[compression_only] with tm.ensure_clean("compressed" + extension) as path: getattr(input, write_method)(path, **write_kwargs) - output = read_method(path, compression=compression_only, **read_kwargs) + if "squeeze" in read_kwargs: + del read_kwargs["squeeze"] + output = read_method( + path, compression=compression_only, **read_kwargs + ).squeeze("columns") tm.assert_series_equal(output, input, check_names=False) diff --git a/pandas/tests/series/methods/test_to_csv.py b/pandas/tests/series/methods/test_to_csv.py index 9684546112078..28519fc9b529f 100644 --- a/pandas/tests/series/methods/test_to_csv.py +++ b/pandas/tests/series/methods/test_to_csv.py @@ -13,11 +13,11 @@ class TestSeriesToCSV: def read_csv(self, path, **kwargs): - params = {"squeeze": True, "index_col": 0, "header": None, "parse_dates": True} + params = {"index_col": 0, "header": None, "parse_dates": True} params.update(**kwargs) header = params.get("header") - out = pd.read_csv(path, **params) + out = pd.read_csv(path, **params).squeeze("columns") if header is None: out.name = out.index.name = None @@ -138,8 +138,7 @@ def test_to_csv_compression(self, s, encoding, compression): compression=compression, encoding=encoding, index_col=0, - squeeze=True, - ) + ).squeeze("columns") tm.assert_series_equal(s, result) # test the round trip using file handle - to_csv -> read_csv @@ -153,8 +152,7 @@ def test_to_csv_compression(self, s, encoding, compression): compression=compression, encoding=encoding, index_col=0, - squeeze=True, - ) + ).squeeze("columns") tm.assert_series_equal(s, result) # explicitly ensure file was compressed @@ -164,7 +162,8 @@ def test_to_csv_compression(self, s, encoding, compression): with tm.decompress_file(filename, compression) as fh: tm.assert_series_equal( - s, pd.read_csv(fh, index_col=0, squeeze=True, encoding=encoding) + s, + pd.read_csv(fh, index_col=0, encoding=encoding).squeeze("columns"), ) def test_to_csv_interval_index(self): @@ -173,7 +172,7 @@ def test_to_csv_interval_index(self): with tm.ensure_clean("__tmp_to_csv_interval_index__.csv") as path: s.to_csv(path, header=False) - result = self.read_csv(path, index_col=0, squeeze=True) + result = self.read_csv(path, index_col=0) # can't roundtrip intervalindex via read_csv so check string repr (GH 23595) expected = s.copy() From a74ade6ba9ce822023353ecd528d043f80ab5edd Mon Sep 17 00:00:00 2001 From: Thomas Li Date: Wed, 8 Sep 2021 16:54:47 -0700 Subject: [PATCH 4/9] more fixes --- pandas/tests/io/test_compression.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/pandas/tests/io/test_compression.py b/pandas/tests/io/test_compression.py index f6f972fcf4f6a..3c278cb48e20f 100644 --- a/pandas/tests/io/test_compression.py +++ b/pandas/tests/io/test_compression.py @@ -96,10 +96,13 @@ def test_series_compression_defaults_to_infer( with tm.ensure_clean("compressed" + extension) as path: getattr(input, write_method)(path, **write_kwargs) if "squeeze" in read_kwargs: - del read_kwargs["squeeze"] - output = read_method( - path, compression=compression_only, **read_kwargs - ).squeeze("columns") + kwargs = read_kwargs.copy() + del kwargs["squeeze"] + output = read_method(path, compression=compression_only, **kwargs).squeeze( + "columns" + ) + else: + output = read_method(path, compression=compression_only, **read_kwargs) tm.assert_series_equal(output, input, check_names=False) From bbdeb912c646977bf2db75e33c9696932bf234b0 Mon Sep 17 00:00:00 2001 From: Thomas Li <47963215+lithomas1@users.noreply.github.com> Date: Wed, 8 Sep 2021 17:37:53 -0700 Subject: [PATCH 5/9] Update io.rst --- doc/source/user_guide/io.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index ab9dec2365971..c69f17512a8ea 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -1221,6 +1221,7 @@ as a ``Series``: fh.write(data) .. ipython:: python + :okwarning: print(open("tmp.csv").read()) From 71ccbb1bad6fc7f7e0d8db56601c3124c76497eb Mon Sep 17 00:00:00 2001 From: Thomas Li <47963215+lithomas1@users.noreply.github.com> Date: Wed, 8 Sep 2021 19:29:25 -0700 Subject: [PATCH 6/9] Update conftest.py --- pandas/tests/io/parser/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/parser/conftest.py b/pandas/tests/io/parser/conftest.py index d5fb467e5b01d..efef06ef28574 100644 --- a/pandas/tests/io/parser/conftest.py +++ b/pandas/tests/io/parser/conftest.py @@ -29,7 +29,7 @@ def read_csv(self, *args, **kwargs): return read_csv(*args, **kwargs) def read_csv_check_warnings( - self, warn_type: Warning, warn_msg: str, *args, **kwargs + self, warn_type: type[Warning], warn_msg: str, *args, **kwargs ): # We need to check the stacklevel here instead of in the tests # since this is where read_csv is called and where the warning From da725dec78ee47a8ffc3b702674b8a27ad559ef4 Mon Sep 17 00:00:00 2001 From: Thomas Li <47963215+lithomas1@users.noreply.github.com> Date: Wed, 8 Sep 2021 20:52:25 -0700 Subject: [PATCH 7/9] Update _base.py --- pandas/io/excel/_base.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 8284229aa158d..51df814ff0ece 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -121,9 +121,10 @@ Returns a subset of the columns according to behavior above. squeeze : bool, default False If the parsed data only contains one column then return a Series. + .. deprecated:: 1.4.0 - Append ``.squeeze("columns")`` to the call to ``read_excel`` to squeeze - the data. + Append ``.squeeze("columns")`` to the call to ``read_excel`` to squeeze + the data. dtype : Type name or dict of column -> type, default None Data type for data or columns. E.g. {'a': np.float64, 'b': np.int32} Use `object` to preserve data as stored in Excel and not interpret dtype. From 5a7d25a1779bab05ef50f37be44ccd83124511a2 Mon Sep 17 00:00:00 2001 From: Thomas Li Date: Thu, 9 Sep 2021 18:11:55 -0700 Subject: [PATCH 8/9] Changes from code review --- pandas/io/parsers/readers.py | 4 ++-- pandas/tests/io/parser/common/test_iterator.py | 7 +++---- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py index ec58e11aa71db..439ae3f32c972 100644 --- a/pandas/io/parsers/readers.py +++ b/pandas/io/parsers/readers.py @@ -1111,8 +1111,8 @@ def _clean_options(self, options, engine): result["na_fvalues"] = na_fvalues result["skiprows"] = skiprows # Default for squeeze is none since we need to check - # if user sets it, we set to False since behavior is - # equivlent + # if user sets it. We then set to False to preserve + # previous behavior. result["squeeze"] = False if options["squeeze"] is None else options["squeeze"] return result, engine diff --git a/pandas/tests/io/parser/common/test_iterator.py b/pandas/tests/io/parser/common/test_iterator.py index 3ffa10b73848b..5966a2fd6e095 100644 --- a/pandas/tests/io/parser/common/test_iterator.py +++ b/pandas/tests/io/parser/common/test_iterator.py @@ -8,7 +8,6 @@ from pandas import ( DataFrame, - Series, concat, ) import pandas._testing as tm @@ -105,6 +104,6 @@ def test_iteration_open_handle(all_parsers): if "CCC" in line: break - result = parser.read_csv(f, **kwargs).squeeze("columns") - expected = Series(["DDD", "EEE", "FFF", "GGG"], name=0) - tm.assert_series_equal(result, expected) + result = parser.read_csv(f, **kwargs) + expected = DataFrame({0: ["DDD", "EEE", "FFF", "GGG"]}) + tm.assert_frame_equal(result, expected) From 71d4f7a27259013387bad5fe741eaf8fbccf9fb6 Mon Sep 17 00:00:00 2001 From: Thomas Li <47963215+lithomas1@users.noreply.github.com> Date: Thu, 9 Sep 2021 20:06:39 -0700 Subject: [PATCH 9/9] Update pandas/io/parsers/readers.py Co-authored-by: gfyoung --- pandas/io/parsers/readers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py index 439ae3f32c972..afbe37b2ef44c 100644 --- a/pandas/io/parsers/readers.py +++ b/pandas/io/parsers/readers.py @@ -134,7 +134,7 @@ If the parsed data only contains one column then return a Series. .. deprecated:: 1.4.0 - Append ``.squeeze("columns")`` to the call to ``read_csv`` to squeeze + Append ``.squeeze("columns")`` to the call to ``{func_name}`` to squeeze the data. prefix : str, optional Prefix to add to column numbers when no header, e.g. 'X' for X0, X1, ...