Skip to content

Commit cd61b59

Browse files
authored
DEPR: squeeze argument in read_csv/read_table/read_excel (#43427)
1 parent 6e19bdc commit cd61b59

File tree

13 files changed

+106
-47
lines changed

13 files changed

+106
-47
lines changed

doc/source/user_guide/io.rst

+5
Original file line numberDiff line numberDiff line change
@@ -1208,6 +1208,10 @@ Returning Series
12081208
Using the ``squeeze`` keyword, the parser will return output with a single column
12091209
as a ``Series``:
12101210

1211+
.. deprecated:: 1.4.0
1212+
Users should append ``.squeeze("columns")`` to the DataFrame returned by
1213+
``read_csv`` instead.
1214+
12111215
.. ipython:: python
12121216
:suppress:
12131217
@@ -1217,6 +1221,7 @@ as a ``Series``:
12171221
fh.write(data)
12181222
12191223
.. ipython:: python
1224+
:okwarning:
12201225
12211226
print(open("tmp.csv").read())
12221227

doc/source/whatsnew/v1.4.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,7 @@ Other Deprecations
278278
- Deprecated :meth:`Index.reindex` with a non-unique index (:issue:`42568`)
279279
- Deprecated :meth:`.Styler.render` in favour of :meth:`.Styler.to_html` (:issue:`42140`)
280280
- Deprecated passing in a string column label into ``times`` in :meth:`DataFrame.ewm` (:issue:`43265`)
281+
- Deprecated the ``squeeze`` argument to :meth:`read_csv`, :meth:`read_table`, and :meth:`read_excel`. Users should squeeze the DataFrame afterwards with ``.squeeze("columns")`` instead. (:issue:`43242`)
281282

282283
.. ---------------------------------------------------------------------------
283284

pandas/io/excel/_base.py

+7-3
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,10 @@
121121
Returns a subset of the columns according to behavior above.
122122
squeeze : bool, default False
123123
If the parsed data only contains one column then return a Series.
124+
125+
.. deprecated:: 1.4.0
126+
Append ``.squeeze("columns")`` to the call to ``read_excel`` to squeeze
127+
the data.
124128
dtype : Type name or dict of column -> type, default None
125129
Data type for data or columns. E.g. {'a': np.float64, 'b': np.int32}
126130
Use `object` to preserve data as stored in Excel and not interpret dtype.
@@ -337,7 +341,7 @@ def read_excel(
337341
names=None,
338342
index_col=None,
339343
usecols=None,
340-
squeeze=False,
344+
squeeze=None,
341345
dtype: DtypeArg | None = None,
342346
engine=None,
343347
converters=None,
@@ -481,7 +485,7 @@ def parse(
481485
names=None,
482486
index_col=None,
483487
usecols=None,
484-
squeeze=False,
488+
squeeze=None,
485489
dtype: DtypeArg | None = None,
486490
true_values=None,
487491
false_values=None,
@@ -1243,7 +1247,7 @@ def parse(
12431247
names=None,
12441248
index_col=None,
12451249
usecols=None,
1246-
squeeze=False,
1250+
squeeze=None,
12471251
converters=None,
12481252
true_values=None,
12491253
false_values=None,

pandas/io/parsers/base_parser.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@
104104
"chunksize": None,
105105
"verbose": False,
106106
"encoding": None,
107-
"squeeze": False,
107+
"squeeze": None,
108108
"compression": None,
109109
"mangle_dupe_cols": True,
110110
"infer_datetime_format": False,

pandas/io/parsers/c_parser_wrapper.py

-1
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@ class CParserWrapper(ParserBase):
3333
def __init__(self, src: FilePathOrBuffer, **kwds):
3434
self.kwds = kwds
3535
kwds = kwds.copy()
36-
3736
ParserBase.__init__(self, kwds)
3837

3938
self.low_memory = kwds.pop("low_memory", False)

pandas/io/parsers/readers.py

+20-6
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
Appender,
2929
deprecate_nonkeyword_arguments,
3030
)
31+
from pandas.util._exceptions import find_stack_level
3132
from pandas.util._validators import validate_bool_kwarg
3233

3334
from pandas.core.dtypes.common import (
@@ -131,6 +132,10 @@
131132
parsing time and lower memory usage.
132133
squeeze : bool, default False
133134
If the parsed data only contains one column then return a Series.
135+
136+
.. deprecated:: 1.4.0
137+
Append ``.squeeze("columns")`` to the call to ``{func_name}`` to squeeze
138+
the data.
134139
prefix : str, optional
135140
Prefix to add to column numbers when no header, e.g. 'X' for X0, X1, ...
136141
mangle_dupe_cols : bool, default True
@@ -439,7 +444,11 @@
439444
"low_memory",
440445
}
441446

442-
_deprecated_defaults: dict[str, Any] = {"error_bad_lines": None, "warn_bad_lines": None}
447+
_deprecated_defaults: dict[str, Any] = {
448+
"error_bad_lines": None,
449+
"warn_bad_lines": None,
450+
"squeeze": None,
451+
}
443452

444453

445454
def validate_integer(name, val, min_val=0):
@@ -552,7 +561,7 @@ def read_csv(
552561
names=lib.no_default,
553562
index_col=None,
554563
usecols=None,
555-
squeeze=False,
564+
squeeze=None,
556565
prefix=lib.no_default,
557566
mangle_dupe_cols=True,
558567
# General Parsing Configuration
@@ -650,7 +659,7 @@ def read_table(
650659
names=lib.no_default,
651660
index_col=None,
652661
usecols=None,
653-
squeeze=False,
662+
squeeze=None,
654663
prefix=lib.no_default,
655664
mangle_dupe_cols=True,
656665
# General Parsing Configuration
@@ -867,11 +876,12 @@ def __init__(self, f, engine=None, **kwds):
867876

868877
self.chunksize = options.pop("chunksize", None)
869878
self.nrows = options.pop("nrows", None)
870-
self.squeeze = options.pop("squeeze", False)
871879

872880
self._check_file_or_buffer(f, engine)
873881
self.options, self.engine = self._clean_options(options, engine)
874882

883+
self.squeeze = self.options.pop("squeeze", False)
884+
875885
if "has_index_names" in kwds:
876886
self.options["has_index_names"] = kwds["has_index_names"]
877887

@@ -1050,7 +1060,7 @@ def _clean_options(self, options, engine):
10501060
f"The {arg} argument has been deprecated and will be "
10511061
"removed in a future version.\n\n"
10521062
)
1053-
warnings.warn(msg, FutureWarning, stacklevel=7)
1063+
warnings.warn(msg, FutureWarning, stacklevel=find_stack_level())
10541064
else:
10551065
result[arg] = parser_default
10561066

@@ -1100,6 +1110,10 @@ def _clean_options(self, options, engine):
11001110
result["na_values"] = na_values
11011111
result["na_fvalues"] = na_fvalues
11021112
result["skiprows"] = skiprows
1113+
# Default for squeeze is none since we need to check
1114+
# if user sets it. We then set to False to preserve
1115+
# previous behavior.
1116+
result["squeeze"] = False if options["squeeze"] is None else options["squeeze"]
11031117

11041118
return result, engine
11051119

@@ -1149,7 +1163,7 @@ def read(self, nrows=None):
11491163
self._currow += new_rows
11501164

11511165
if self.squeeze and len(df.columns) == 1:
1152-
return df[df.columns[0]].copy()
1166+
return df.squeeze("columns").copy()
11531167
return df
11541168

11551169
def get_chunk(self, size=None):

pandas/tests/frame/methods/test_to_csv.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -1039,8 +1039,7 @@ def test_to_csv_compression(self, df, encoding, compression):
10391039
compression=compression,
10401040
encoding=encoding,
10411041
index_col=0,
1042-
squeeze=True,
1043-
)
1042+
).squeeze("columns")
10441043
tm.assert_frame_equal(df, result)
10451044

10461045
# explicitly make sure file is compressed

pandas/tests/io/excel/test_readers.py

+17-10
Original file line numberDiff line numberDiff line change
@@ -1194,18 +1194,25 @@ def test_read_excel_squeeze(self, read_ext):
11941194
# GH 12157
11951195
f = "test_squeeze" + read_ext
11961196

1197-
actual = pd.read_excel(f, sheet_name="two_columns", index_col=0, squeeze=True)
1198-
expected = Series([2, 3, 4], [4, 5, 6], name="b")
1199-
expected.index.name = "a"
1200-
tm.assert_series_equal(actual, expected)
1197+
with tm.assert_produces_warning(
1198+
FutureWarning,
1199+
match="The squeeze argument has been deprecated "
1200+
"and will be removed in a future version.\n\n",
1201+
):
1202+
actual = pd.read_excel(
1203+
f, sheet_name="two_columns", index_col=0, squeeze=True
1204+
)
1205+
expected = Series([2, 3, 4], [4, 5, 6], name="b")
1206+
expected.index.name = "a"
1207+
tm.assert_series_equal(actual, expected)
12011208

1202-
actual = pd.read_excel(f, sheet_name="two_columns", squeeze=True)
1203-
expected = DataFrame({"a": [4, 5, 6], "b": [2, 3, 4]})
1204-
tm.assert_frame_equal(actual, expected)
1209+
actual = pd.read_excel(f, sheet_name="two_columns", squeeze=True)
1210+
expected = DataFrame({"a": [4, 5, 6], "b": [2, 3, 4]})
1211+
tm.assert_frame_equal(actual, expected)
12051212

1206-
actual = pd.read_excel(f, sheet_name="one_column", squeeze=True)
1207-
expected = Series([1, 2, 3], name="a")
1208-
tm.assert_series_equal(actual, expected)
1213+
actual = pd.read_excel(f, sheet_name="one_column", squeeze=True)
1214+
expected = Series([1, 2, 3], name="a")
1215+
tm.assert_series_equal(actual, expected)
12091216

12101217
def test_deprecated_kwargs(self, read_ext):
12111218
with tm.assert_produces_warning(FutureWarning, raise_on_extra_warnings=False):

pandas/tests/io/parser/common/test_common_basic.py

+25-11
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,8 @@ def test_1000_sep(all_parsers):
128128
tm.assert_frame_equal(result, expected)
129129

130130

131-
def test_squeeze(all_parsers):
131+
@pytest.mark.parametrize("squeeze", [True, False])
132+
def test_squeeze(all_parsers, squeeze):
132133
data = """\
133134
a,1
134135
b,2
@@ -138,13 +139,25 @@ def test_squeeze(all_parsers):
138139
index = Index(["a", "b", "c"], name=0)
139140
expected = Series([1, 2, 3], name=1, index=index)
140141

141-
result = parser.read_csv(StringIO(data), index_col=0, header=None, squeeze=True)
142-
tm.assert_series_equal(result, expected)
142+
result = parser.read_csv_check_warnings(
143+
FutureWarning,
144+
"The squeeze argument has been deprecated "
145+
"and will be removed in a future version.\n\n",
146+
StringIO(data),
147+
index_col=0,
148+
header=None,
149+
squeeze=squeeze,
150+
)
151+
if not squeeze:
152+
expected = DataFrame(expected)
153+
tm.assert_frame_equal(result, expected)
154+
else:
155+
tm.assert_series_equal(result, expected)
143156

144-
# see gh-8217
145-
#
146-
# Series should not be a view.
147-
assert not result._is_view
157+
# see gh-8217
158+
#
159+
# Series should not be a view.
160+
assert not result._is_view
148161

149162

150163
@xfail_pyarrow
@@ -847,12 +860,13 @@ def test_deprecated_bad_lines_warns(all_parsers, csv1, on_bad_lines):
847860
# GH 15122
848861
parser = all_parsers
849862
kwds = {f"{on_bad_lines}_bad_lines": False}
850-
with tm.assert_produces_warning(
863+
parser.read_csv_check_warnings(
851864
FutureWarning,
852-
match=f"The {on_bad_lines}_bad_lines argument has been deprecated "
865+
f"The {on_bad_lines}_bad_lines argument has been deprecated "
853866
"and will be removed in a future version.\n\n",
854-
):
855-
parser.read_csv(csv1, **kwds)
867+
csv1,
868+
**kwds,
869+
)
856870

857871

858872
def test_malformed_second_line(all_parsers):

pandas/tests/io/parser/common/test_iterator.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88

99
from pandas import (
1010
DataFrame,
11-
Series,
1211
concat,
1312
)
1413
import pandas._testing as tm
@@ -94,7 +93,7 @@ def test_iterator_skipfooter_errors(all_parsers, kwargs):
9493

9594
def test_iteration_open_handle(all_parsers):
9695
parser = all_parsers
97-
kwargs = {"squeeze": True, "header": None}
96+
kwargs = {"header": None}
9897

9998
with tm.ensure_clean() as path:
10099
with open(path, "w") as f:
@@ -106,5 +105,5 @@ def test_iteration_open_handle(all_parsers):
106105
break
107106

108107
result = parser.read_csv(f, **kwargs)
109-
expected = Series(["DDD", "EEE", "FFF", "GGG"], name=0)
110-
tm.assert_series_equal(result, expected)
108+
expected = DataFrame({0: ["DDD", "EEE", "FFF", "GGG"]})
109+
tm.assert_frame_equal(result, expected)

pandas/tests/io/parser/conftest.py

+11
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
read_csv,
1111
read_table,
1212
)
13+
import pandas._testing as tm
1314

1415

1516
class BaseParser:
@@ -27,6 +28,16 @@ def read_csv(self, *args, **kwargs):
2728
kwargs = self.update_kwargs(kwargs)
2829
return read_csv(*args, **kwargs)
2930

31+
def read_csv_check_warnings(
32+
self, warn_type: type[Warning], warn_msg: str, *args, **kwargs
33+
):
34+
# We need to check the stacklevel here instead of in the tests
35+
# since this is where read_csv is called and where the warning
36+
# should point to.
37+
kwargs = self.update_kwargs(kwargs)
38+
with tm.assert_produces_warning(warn_type, match=warn_msg):
39+
return read_csv(*args, **kwargs)
40+
3041
def read_table(self, *args, **kwargs):
3142
kwargs = self.update_kwargs(kwargs)
3243
return read_table(*args, **kwargs)

pandas/tests/io/test_compression.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,14 @@ def test_series_compression_defaults_to_infer(
9595
extension = icom._compression_to_extension[compression_only]
9696
with tm.ensure_clean("compressed" + extension) as path:
9797
getattr(input, write_method)(path, **write_kwargs)
98-
output = read_method(path, compression=compression_only, **read_kwargs)
98+
if "squeeze" in read_kwargs:
99+
kwargs = read_kwargs.copy()
100+
del kwargs["squeeze"]
101+
output = read_method(path, compression=compression_only, **kwargs).squeeze(
102+
"columns"
103+
)
104+
else:
105+
output = read_method(path, compression=compression_only, **read_kwargs)
99106
tm.assert_series_equal(output, input, check_names=False)
100107

101108

pandas/tests/series/methods/test_to_csv.py

+7-8
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,11 @@
1313

1414
class TestSeriesToCSV:
1515
def read_csv(self, path, **kwargs):
16-
params = {"squeeze": True, "index_col": 0, "header": None, "parse_dates": True}
16+
params = {"index_col": 0, "header": None, "parse_dates": True}
1717
params.update(**kwargs)
1818

1919
header = params.get("header")
20-
out = pd.read_csv(path, **params)
20+
out = pd.read_csv(path, **params).squeeze("columns")
2121

2222
if header is None:
2323
out.name = out.index.name = None
@@ -138,8 +138,7 @@ def test_to_csv_compression(self, s, encoding, compression):
138138
compression=compression,
139139
encoding=encoding,
140140
index_col=0,
141-
squeeze=True,
142-
)
141+
).squeeze("columns")
143142
tm.assert_series_equal(s, result)
144143

145144
# test the round trip using file handle - to_csv -> read_csv
@@ -153,8 +152,7 @@ def test_to_csv_compression(self, s, encoding, compression):
153152
compression=compression,
154153
encoding=encoding,
155154
index_col=0,
156-
squeeze=True,
157-
)
155+
).squeeze("columns")
158156
tm.assert_series_equal(s, result)
159157

160158
# explicitly ensure file was compressed
@@ -164,7 +162,8 @@ def test_to_csv_compression(self, s, encoding, compression):
164162

165163
with tm.decompress_file(filename, compression) as fh:
166164
tm.assert_series_equal(
167-
s, pd.read_csv(fh, index_col=0, squeeze=True, encoding=encoding)
165+
s,
166+
pd.read_csv(fh, index_col=0, encoding=encoding).squeeze("columns"),
168167
)
169168

170169
def test_to_csv_interval_index(self):
@@ -173,7 +172,7 @@ def test_to_csv_interval_index(self):
173172

174173
with tm.ensure_clean("__tmp_to_csv_interval_index__.csv") as path:
175174
s.to_csv(path, header=False)
176-
result = self.read_csv(path, index_col=0, squeeze=True)
175+
result = self.read_csv(path, index_col=0)
177176

178177
# can't roundtrip intervalindex via read_csv so check string repr (GH 23595)
179178
expected = s.copy()

0 commit comments

Comments
 (0)