Skip to content

Commit 6587970

Browse files
author
Oleh Kozynets
committed
Fix delim_whitespace behavior in read_table, read_csv
1 parent 027f365 commit 6587970

File tree

2 files changed

+66
-14
lines changed

2 files changed

+66
-14
lines changed

pandas/io/parsers.py

+18-14
Original file line numberDiff line numberDiff line change
@@ -542,7 +542,7 @@ def _read(filepath_or_buffer: FilePathOrBuffer, kwds):
542542
)
543543
def read_csv(
544544
filepath_or_buffer: FilePathOrBuffer,
545-
sep=",",
545+
sep=lib.no_default,
546546
delimiter=None,
547547
# Column and Index Locations and Names
548548
header="infer",
@@ -612,10 +612,8 @@ def read_csv(
612612
# Thus, we need a flag to indicate that we need to "override"
613613
# the comparison to dialect values by checking if default values
614614
# for BOTH "delimiter" and "sep" were provided.
615-
default_sep = ","
616-
617615
if dialect is not None:
618-
sep_override = delimiter is None and sep == default_sep
616+
sep_override = (delimiter is None) and (sep is lib.no_default)
619617
kwds = dict(sep_override=sep_override)
620618
else:
621619
kwds = dict()
@@ -624,12 +622,16 @@ def read_csv(
624622
if delimiter is None:
625623
delimiter = sep
626624

627-
if delim_whitespace and delimiter != default_sep:
625+
if delim_whitespace and (delimiter is not lib.no_default):
628626
raise ValueError(
629627
"Specified a delimiter with both sep and "
630628
"delim_whitespace=True; you can only specify one."
631629
)
632630

631+
if delimiter is lib.no_default:
632+
# assign default separator value
633+
delimiter = ","
634+
633635
if engine is not None:
634636
engine_specified = True
635637
else:
@@ -700,7 +702,7 @@ def read_csv(
700702
)
701703
def read_table(
702704
filepath_or_buffer: FilePathOrBuffer,
703-
sep="\t",
705+
sep=lib.no_default,
704706
delimiter=None,
705707
# Column and Index Locations and Names
706708
header="infer",
@@ -758,15 +760,17 @@ def read_table(
758760
float_precision=None,
759761
):
760762
# TODO: validation duplicated in read_csv
761-
if delim_whitespace and (delimiter is not None or sep != "\t"):
762-
raise ValueError(
763-
"Specified a delimiter with both sep and "
764-
"delim_whitespace=True; you can only specify one."
765-
)
766763
if delim_whitespace:
767-
# In this case sep is not used so we set it to the read_csv
768-
# default to avoid a ValueError
769-
sep = ","
764+
if (delimiter is not None) or (sep is not lib.no_default):
765+
raise ValueError(
766+
"Specified a delimiter with both sep and "
767+
"delim_whitespace=True; you can only specify one."
768+
)
769+
else:
770+
if sep is lib.no_default:
771+
# assign default delimeter value
772+
sep = "\t"
773+
770774
return read_csv(**locals())
771775

772776

pandas/tests/io/parser/test_common.py

+48
Original file line numberDiff line numberDiff line change
@@ -2211,6 +2211,21 @@ def test_read_table_delim_whitespace_default_sep(all_parsers):
22112211
tm.assert_frame_equal(result, expected)
22122212

22132213

2214+
def test_read_csv_delim_whitespace_non_default_sep(all_parsers):
2215+
# GH: 35958
2216+
f = StringIO("a b c\n1 -2 -3\n4 5 6")
2217+
parser = all_parsers
2218+
msg = (
2219+
"Specified a delimiter with both sep and "
2220+
"delim_whitespace=True; you can only specify one."
2221+
)
2222+
with pytest.raises(ValueError, match=msg):
2223+
parser.read_csv(f, delim_whitespace=True, sep="\t")
2224+
2225+
with pytest.raises(ValueError, match=msg):
2226+
parser.read_csv(f, delim_whitespace=True, delimiter="\t")
2227+
2228+
22142229
def test_read_table_delim_whitespace_non_default_sep(all_parsers):
22152230
# GH: 35958
22162231
f = StringIO("a b c\n1 -2 -3\n4 5 6")
@@ -2221,3 +2236,36 @@ def test_read_table_delim_whitespace_non_default_sep(all_parsers):
22212236
)
22222237
with pytest.raises(ValueError, match=msg):
22232238
parser.read_table(f, delim_whitespace=True, sep=",")
2239+
2240+
with pytest.raises(ValueError, match=msg):
2241+
parser.read_table(f, delim_whitespace=True, delimiter=",")
2242+
2243+
2244+
def test_read_csv_delim_whitespace_explicit_default_sep(all_parsers):
2245+
# GH: 35958
2246+
f = StringIO("a b c\n1 -2 -3\n4 5 6")
2247+
parser = all_parsers
2248+
msg = (
2249+
"Specified a delimiter with both sep and "
2250+
"delim_whitespace=True; you can only specify one."
2251+
)
2252+
with pytest.raises(ValueError, match=msg):
2253+
parser.read_csv(f, delim_whitespace=True, sep=",")
2254+
2255+
with pytest.raises(ValueError, match=msg):
2256+
parser.read_csv(f, delim_whitespace=True, delimiter=",")
2257+
2258+
2259+
def test_read_table_delim_whitespace_explicit_default_sep(all_parsers):
2260+
# GH: 35958
2261+
f = StringIO("a b c\n1 -2 -3\n4 5 6")
2262+
parser = all_parsers
2263+
msg = (
2264+
"Specified a delimiter with both sep and "
2265+
"delim_whitespace=True; you can only specify one."
2266+
)
2267+
with pytest.raises(ValueError, match=msg):
2268+
parser.read_table(f, delim_whitespace=True, sep="\t")
2269+
2270+
with pytest.raises(ValueError, match=msg):
2271+
parser.read_table(f, delim_whitespace=True, delimiter="\t")

0 commit comments

Comments
 (0)