Skip to content

Commit 4ac340e

Browse files
DEPR: delim_whitespace kwd in read_csv (#56557)
* DEPR: delim_whitespace kwd in read_csv * Update doc/source/whatsnew/v2.2.0.rst Co-authored-by: Matthew Roeschke <[email protected]> --------- Co-authored-by: Matthew Roeschke <[email protected]>
1 parent fca991b commit 4ac340e

File tree

11 files changed

+162
-43
lines changed

11 files changed

+162
-43
lines changed

doc/source/user_guide/io.rst

+3
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,9 @@ delim_whitespace : boolean, default False
8181
If this option is set to ``True``, nothing should be passed in for the
8282
``delimiter`` parameter.
8383

84+
.. deprecated: 2.2.0
85+
Use ``sep="\\s+" instead.
86+
8487
Column and index locations and names
8588
++++++++++++++++++++++++++++++++++++
8689

doc/source/whatsnew/v2.2.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -484,6 +484,7 @@ Other Deprecations
484484
- Deprecated support for combining parsed datetime columns in :func:`read_csv` along with the ``keep_date_col`` keyword (:issue:`55569`)
485485
- Deprecated the :attr:`.DataFrameGroupBy.grouper` and :attr:`SeriesGroupBy.grouper`; these attributes will be removed in a future version of pandas (:issue:`56521`)
486486
- Deprecated the :class:`.Grouping` attributes ``group_index``, ``result_index``, and ``group_arraylike``; these will be removed in a future version of pandas (:issue:`56148`)
487+
- Deprecated the ``delim_whitespace`` keyword in :func:`read_csv` and :func:`read_table`, use ``sep="\\s+"`` instead (:issue:`55569`)
487488
- Deprecated the ``errors="ignore"`` option in :func:`to_datetime`, :func:`to_timedelta`, and :func:`to_numeric`; explicitly catch exceptions instead (:issue:`54467`)
488489
- Deprecated the ``fastpath`` keyword in the :class:`Series` constructor (:issue:`20110`)
489490
- Deprecated the ``kind`` keyword in :meth:`Series.resample` and :meth:`DataFrame.resample`, explicitly cast the object's ``index`` instead (:issue:`55895`)

pandas/io/parsers/readers.py

+34-6
Original file line numberDiff line numberDiff line change
@@ -403,6 +403,9 @@
403403
used as the ``sep`` delimiter. Equivalent to setting ``sep='\\s+'``. If this option
404404
is set to ``True``, nothing should be passed in for the ``delimiter``
405405
parameter.
406+
407+
.. deprecated:: 2.2.0
408+
Use ``sep="\\s+"`` instead.
406409
low_memory : bool, default True
407410
Internally process the file in chunks, resulting in lower memory use
408411
while parsing, but possibly mixed type inference. To ensure no mixed
@@ -670,7 +673,7 @@ def read_csv(
670673
encoding_errors: str | None = ...,
671674
dialect: str | csv.Dialect | None = ...,
672675
on_bad_lines=...,
673-
delim_whitespace: bool = ...,
676+
delim_whitespace: bool | lib.NoDefault = ...,
674677
low_memory: bool = ...,
675678
memory_map: bool = ...,
676679
float_precision: Literal["high", "legacy"] | None = ...,
@@ -730,7 +733,7 @@ def read_csv(
730733
encoding_errors: str | None = ...,
731734
dialect: str | csv.Dialect | None = ...,
732735
on_bad_lines=...,
733-
delim_whitespace: bool = ...,
736+
delim_whitespace: bool | lib.NoDefault = ...,
734737
low_memory: bool = ...,
735738
memory_map: bool = ...,
736739
float_precision: Literal["high", "legacy"] | None = ...,
@@ -790,7 +793,7 @@ def read_csv(
790793
encoding_errors: str | None = ...,
791794
dialect: str | csv.Dialect | None = ...,
792795
on_bad_lines=...,
793-
delim_whitespace: bool = ...,
796+
delim_whitespace: bool | lib.NoDefault = ...,
794797
low_memory: bool = ...,
795798
memory_map: bool = ...,
796799
float_precision: Literal["high", "legacy"] | None = ...,
@@ -850,7 +853,7 @@ def read_csv(
850853
encoding_errors: str | None = ...,
851854
dialect: str | csv.Dialect | None = ...,
852855
on_bad_lines=...,
853-
delim_whitespace: bool = ...,
856+
delim_whitespace: bool | lib.NoDefault = ...,
854857
low_memory: bool = ...,
855858
memory_map: bool = ...,
856859
float_precision: Literal["high", "legacy"] | None = ...,
@@ -928,7 +931,7 @@ def read_csv(
928931
# Error Handling
929932
on_bad_lines: str = "error",
930933
# Internal
931-
delim_whitespace: bool = False,
934+
delim_whitespace: bool | lib.NoDefault = lib.no_default,
932935
low_memory: bool = _c_parser_defaults["low_memory"],
933936
memory_map: bool = False,
934937
float_precision: Literal["high", "legacy"] | None = None,
@@ -978,6 +981,17 @@ def read_csv(
978981
stacklevel=find_stack_level(),
979982
)
980983

984+
if delim_whitespace is not lib.no_default:
985+
# GH#55569
986+
warnings.warn(
987+
"The 'delim_whitespace' keyword in pd.read_csv is deprecated and "
988+
"will be removed in a future version. Use ``sep='\\s+'`` instead",
989+
FutureWarning,
990+
stacklevel=find_stack_level(),
991+
)
992+
else:
993+
delim_whitespace = False
994+
981995
if verbose is not lib.no_default:
982996
# GH#55569
983997
warnings.warn(
@@ -1305,7 +1319,7 @@ def read_table(
13051319
# Error Handling
13061320
on_bad_lines: str = "error",
13071321
# Internal
1308-
delim_whitespace: bool = False,
1322+
delim_whitespace: bool | lib.NoDefault = lib.no_default,
13091323
low_memory: bool = _c_parser_defaults["low_memory"],
13101324
memory_map: bool = False,
13111325
float_precision: str | None = None,
@@ -1346,6 +1360,17 @@ def read_table(
13461360
stacklevel=find_stack_level(),
13471361
)
13481362

1363+
if delim_whitespace is not lib.no_default:
1364+
# GH#55569
1365+
warnings.warn(
1366+
"The 'delim_whitespace' keyword in pd.read_table is deprecated and "
1367+
"will be removed in a future version. Use ``sep='\\s+'`` instead",
1368+
FutureWarning,
1369+
stacklevel=find_stack_level(),
1370+
)
1371+
else:
1372+
delim_whitespace = False
1373+
13491374
if verbose is not lib.no_default:
13501375
# GH#55569
13511376
warnings.warn(
@@ -2131,6 +2156,9 @@ def _refine_defaults_read(
21312156
used as the sep. Equivalent to setting ``sep='\\s+'``. If this option
21322157
is set to True, nothing should be passed in for the ``delimiter``
21332158
parameter.
2159+
2160+
.. deprecated:: 2.2.0
2161+
Use ``sep="\\s+"`` instead.
21342162
engine : {{'c', 'python'}}
21352163
Parser engine to use. The C engine is faster while the python engine is
21362164
currently more feature-complete.

pandas/tests/io/parser/common/test_common_basic.py

+56-19
Original file line numberDiff line numberDiff line change
@@ -500,13 +500,21 @@ def test_trailing_spaces(all_parsers, kwargs, expected):
500500
data = "A B C \nrandom line with trailing spaces \nskip\n1,2,3\n1,2.,4.\nrandom line with trailing tabs\t\t\t\n \n5.1,NaN,10.0\n" # noqa: E501
501501
parser = all_parsers
502502

503+
depr_msg = "The 'delim_whitespace' keyword in pd.read_csv is deprecated"
504+
503505
if parser.engine == "pyarrow":
504506
msg = "The 'delim_whitespace' option is not supported with the 'pyarrow' engine"
505507
with pytest.raises(ValueError, match=msg):
506-
parser.read_csv(StringIO(data.replace(",", " ")), **kwargs)
508+
with tm.assert_produces_warning(
509+
FutureWarning, match=depr_msg, check_stacklevel=False
510+
):
511+
parser.read_csv(StringIO(data.replace(",", " ")), **kwargs)
507512
return
508513

509-
result = parser.read_csv(StringIO(data.replace(",", " ")), **kwargs)
514+
with tm.assert_produces_warning(
515+
FutureWarning, match=depr_msg, check_stacklevel=False
516+
):
517+
result = parser.read_csv(StringIO(data.replace(",", " ")), **kwargs)
510518
tm.assert_frame_equal(result, expected)
511519

512520

@@ -515,8 +523,12 @@ def test_raise_on_sep_with_delim_whitespace(all_parsers):
515523
data = "a b c\n1 2 3"
516524
parser = all_parsers
517525

526+
depr_msg = "The 'delim_whitespace' keyword in pd.read_csv is deprecated"
518527
with pytest.raises(ValueError, match="you can only specify one"):
519-
parser.read_csv(StringIO(data), sep=r"\s", delim_whitespace=True)
528+
with tm.assert_produces_warning(
529+
FutureWarning, match=depr_msg, check_stacklevel=False
530+
):
531+
parser.read_csv(StringIO(data), sep=r"\s", delim_whitespace=True)
520532

521533

522534
def test_read_filepath_or_buffer(all_parsers):
@@ -539,18 +551,27 @@ def test_single_char_leading_whitespace(all_parsers, delim_whitespace):
539551
b\n"""
540552

541553
expected = DataFrame({"MyColumn": list("abab")})
554+
depr_msg = "The 'delim_whitespace' keyword in pd.read_csv is deprecated"
542555

543556
if parser.engine == "pyarrow":
544557
msg = "The 'skipinitialspace' option is not supported with the 'pyarrow' engine"
545558
with pytest.raises(ValueError, match=msg):
546-
parser.read_csv(
547-
StringIO(data), skipinitialspace=True, delim_whitespace=delim_whitespace
548-
)
559+
with tm.assert_produces_warning(
560+
FutureWarning, match=depr_msg, check_stacklevel=False
561+
):
562+
parser.read_csv(
563+
StringIO(data),
564+
skipinitialspace=True,
565+
delim_whitespace=delim_whitespace,
566+
)
549567
return
550568

551-
result = parser.read_csv(
552-
StringIO(data), skipinitialspace=True, delim_whitespace=delim_whitespace
553-
)
569+
with tm.assert_produces_warning(
570+
FutureWarning, match=depr_msg, check_stacklevel=False
571+
):
572+
result = parser.read_csv(
573+
StringIO(data), skipinitialspace=True, delim_whitespace=delim_whitespace
574+
)
554575
tm.assert_frame_equal(result, expected)
555576

556577

@@ -798,12 +819,20 @@ def test_read_table_delim_whitespace_default_sep(all_parsers):
798819
f = StringIO("a b c\n1 -2 -3\n4 5 6")
799820
parser = all_parsers
800821

822+
depr_msg = "The 'delim_whitespace' keyword in pd.read_table is deprecated"
823+
801824
if parser.engine == "pyarrow":
802825
msg = "The 'delim_whitespace' option is not supported with the 'pyarrow' engine"
803826
with pytest.raises(ValueError, match=msg):
804-
parser.read_table(f, delim_whitespace=True)
827+
with tm.assert_produces_warning(
828+
FutureWarning, match=depr_msg, check_stacklevel=False
829+
):
830+
parser.read_table(f, delim_whitespace=True)
805831
return
806-
result = parser.read_table(f, delim_whitespace=True)
832+
with tm.assert_produces_warning(
833+
FutureWarning, match=depr_msg, check_stacklevel=False
834+
):
835+
result = parser.read_table(f, delim_whitespace=True)
807836
expected = DataFrame({"a": [1, 4], "b": [-2, 5], "c": [-3, 6]})
808837
tm.assert_frame_equal(result, expected)
809838

@@ -817,11 +846,15 @@ def test_read_csv_delim_whitespace_non_default_sep(all_parsers, delimiter):
817846
"Specified a delimiter with both sep and "
818847
"delim_whitespace=True; you can only specify one."
819848
)
820-
with pytest.raises(ValueError, match=msg):
821-
parser.read_csv(f, delim_whitespace=True, sep=delimiter)
849+
depr_msg = "The 'delim_whitespace' keyword in pd.read_csv is deprecated"
850+
with tm.assert_produces_warning(
851+
FutureWarning, match=depr_msg, check_stacklevel=False
852+
):
853+
with pytest.raises(ValueError, match=msg):
854+
parser.read_csv(f, delim_whitespace=True, sep=delimiter)
822855

823-
with pytest.raises(ValueError, match=msg):
824-
parser.read_csv(f, delim_whitespace=True, delimiter=delimiter)
856+
with pytest.raises(ValueError, match=msg):
857+
parser.read_csv(f, delim_whitespace=True, delimiter=delimiter)
825858

826859

827860
def test_read_csv_delimiter_and_sep_no_default(all_parsers):
@@ -858,11 +891,15 @@ def test_read_table_delim_whitespace_non_default_sep(all_parsers, delimiter):
858891
"Specified a delimiter with both sep and "
859892
"delim_whitespace=True; you can only specify one."
860893
)
861-
with pytest.raises(ValueError, match=msg):
862-
parser.read_table(f, delim_whitespace=True, sep=delimiter)
894+
depr_msg = "The 'delim_whitespace' keyword in pd.read_table is deprecated"
895+
with tm.assert_produces_warning(
896+
FutureWarning, match=depr_msg, check_stacklevel=False
897+
):
898+
with pytest.raises(ValueError, match=msg):
899+
parser.read_table(f, delim_whitespace=True, sep=delimiter)
863900

864-
with pytest.raises(ValueError, match=msg):
865-
parser.read_table(f, delim_whitespace=True, delimiter=delimiter)
901+
with pytest.raises(ValueError, match=msg):
902+
parser.read_table(f, delim_whitespace=True, delimiter=delimiter)
866903

867904

868905
@skip_pyarrow

pandas/tests/io/parser/test_c_parser_only.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,11 @@ def test_delim_whitespace_custom_terminator(c_parser_only):
5151
data = "a b c~1 2 3~4 5 6~7 8 9"
5252
parser = c_parser_only
5353

54-
df = parser.read_csv(StringIO(data), lineterminator="~", delim_whitespace=True)
54+
depr_msg = "The 'delim_whitespace' keyword in pd.read_csv is deprecated"
55+
with tm.assert_produces_warning(
56+
FutureWarning, match=depr_msg, check_stacklevel=False
57+
):
58+
df = parser.read_csv(StringIO(data), lineterminator="~", delim_whitespace=True)
5559
expected = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=["a", "b", "c"])
5660
tm.assert_frame_equal(df, expected)
5761

pandas/tests/io/parser/test_comment.py

+14-3
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,12 @@ def test_line_comment(all_parsers, read_kwargs, request):
4141
#ignore this line
4242
5.,NaN,10.0
4343
"""
44+
warn = None
45+
depr_msg = "The 'delim_whitespace' keyword in pd.read_csv is deprecated"
46+
4447
if read_kwargs.get("delim_whitespace"):
4548
data = data.replace(",", " ")
49+
warn = FutureWarning
4650
elif read_kwargs.get("lineterminator"):
4751
data = data.replace("\n", read_kwargs.get("lineterminator"))
4852

@@ -55,15 +59,22 @@ def test_line_comment(all_parsers, read_kwargs, request):
5559
else:
5660
msg = "The 'comment' option is not supported with the 'pyarrow' engine"
5761
with pytest.raises(ValueError, match=msg):
58-
parser.read_csv(StringIO(data), **read_kwargs)
62+
with tm.assert_produces_warning(
63+
warn, match=depr_msg, check_stacklevel=False
64+
):
65+
parser.read_csv(StringIO(data), **read_kwargs)
5966
return
6067
elif parser.engine == "python" and read_kwargs.get("lineterminator"):
6168
msg = r"Custom line terminators not supported in python parser \(yet\)"
6269
with pytest.raises(ValueError, match=msg):
63-
parser.read_csv(StringIO(data), **read_kwargs)
70+
with tm.assert_produces_warning(
71+
warn, match=depr_msg, check_stacklevel=False
72+
):
73+
parser.read_csv(StringIO(data), **read_kwargs)
6474
return
6575

66-
result = parser.read_csv(StringIO(data), **read_kwargs)
76+
with tm.assert_produces_warning(warn, match=depr_msg, check_stacklevel=False):
77+
result = parser.read_csv(StringIO(data), **read_kwargs)
6778

6879
expected = DataFrame(
6980
[[1.0, 2.0, 4.0], [5.0, np.nan, 10.0]], columns=["A", "B", "C"]

pandas/tests/io/parser/test_header.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -706,7 +706,11 @@ def test_header_delim_whitespace(all_parsers):
706706
3,4
707707
"""
708708

709-
result = parser.read_csv(StringIO(data), delim_whitespace=True)
709+
depr_msg = "The 'delim_whitespace' keyword in pd.read_csv is deprecated"
710+
with tm.assert_produces_warning(
711+
FutureWarning, match=depr_msg, check_stacklevel=False
712+
):
713+
result = parser.read_csv(StringIO(data), delim_whitespace=True)
710714
expected = DataFrame({"a,b": ["1,2", "3,4"]})
711715
tm.assert_frame_equal(result, expected)
712716

pandas/tests/io/parser/test_read_fwf.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -602,7 +602,10 @@ def test_skiprows_inference():
602602
101.6 956.1
603603
""".strip()
604604
skiprows = 2
605-
expected = read_csv(StringIO(data), skiprows=skiprows, delim_whitespace=True)
605+
606+
depr_msg = "The 'delim_whitespace' keyword in pd.read_csv is deprecated"
607+
with tm.assert_produces_warning(FutureWarning, match=depr_msg):
608+
expected = read_csv(StringIO(data), skiprows=skiprows, delim_whitespace=True)
606609

607610
result = read_fwf(StringIO(data), skiprows=skiprows)
608611
tm.assert_frame_equal(result, expected)
@@ -617,7 +620,10 @@ def test_skiprows_by_index_inference():
617620
456 78 9 456
618621
""".strip()
619622
skiprows = [0, 2]
620-
expected = read_csv(StringIO(data), skiprows=skiprows, delim_whitespace=True)
623+
624+
depr_msg = "The 'delim_whitespace' keyword in pd.read_csv is deprecated"
625+
with tm.assert_produces_warning(FutureWarning, match=depr_msg):
626+
expected = read_csv(StringIO(data), skiprows=skiprows, delim_whitespace=True)
621627

622628
result = read_fwf(StringIO(data), skiprows=skiprows)
623629
tm.assert_frame_equal(result, expected)

pandas/tests/io/parser/test_skiprows.py

+11-6
Original file line numberDiff line numberDiff line change
@@ -216,12 +216,17 @@ def test_skiprows_lineterminator(all_parsers, lineterminator, request):
216216
request.applymarker(mark)
217217

218218
data = data.replace("\n", lineterminator)
219-
result = parser.read_csv(
220-
StringIO(data),
221-
skiprows=1,
222-
delim_whitespace=True,
223-
names=["date", "time", "var", "flag", "oflag"],
224-
)
219+
220+
depr_msg = "The 'delim_whitespace' keyword in pd.read_csv is deprecated"
221+
with tm.assert_produces_warning(
222+
FutureWarning, match=depr_msg, check_stacklevel=False
223+
):
224+
result = parser.read_csv(
225+
StringIO(data),
226+
skiprows=1,
227+
delim_whitespace=True,
228+
names=["date", "time", "var", "flag", "oflag"],
229+
)
225230
tm.assert_frame_equal(result, expected)
226231

227232

0 commit comments

Comments
 (0)