Skip to content

Commit 4f3d833

Browse files
committed
TST: Make encoded sep check more locale sensitive
Closes gh-14140.
1 parent 02df7b6 commit 4f3d833

File tree

2 files changed

+13
-12
lines changed

2 files changed

+13
-12
lines changed

pandas/io/parsers.py

+13-8
Original file line numberDiff line numberDiff line change
@@ -800,17 +800,22 @@ def _clean_options(self, options, engine):
800800
" different from '\s+' are"\
801801
" interpreted as regex)"
802802
engine = 'python'
803-
804-
elif len(sep.encode(encoding)) > 1:
805-
if engine not in ('python', 'python-fwf'):
806-
fallback_reason = "the separator encoded in {encoding}"\
807-
" is > 1 char long, and the 'c' engine"\
808-
" does not support such separators".format(
809-
encoding=encoding)
810-
engine = 'python'
811803
elif delim_whitespace:
812804
if 'python' in engine:
813805
result['delimiter'] = '\s+'
806+
elif sep is not None:
807+
encodeable = True
808+
try:
809+
if len(sep.encode(encoding)) > 1:
810+
encodeable = False
811+
except UnicodeDecodeError:
812+
encodeable = False
813+
if not encodeable and engine not in ('python', 'python-fwf'):
814+
fallback_reason = "the separator encoded in {encoding}" \
815+
" is > 1 char long, and the 'c' engine" \
816+
" does not support such separators".format(
817+
encoding=encoding)
818+
engine = 'python'
814819

815820
if fallback_reason and engine_specified:
816821
raise ValueError(fallback_reason)

pandas/io/tests/parser/test_unsupported.py

-4
Original file line numberDiff line numberDiff line change
@@ -60,10 +60,6 @@ def test_c_engine(self):
6060
sep=None, delim_whitespace=False)
6161
with tm.assertRaisesRegexp(ValueError, msg):
6262
read_table(StringIO(data), engine='c', sep='\s')
63-
64-
# GH 14120, skipping as failing when locale is set
65-
# with tm.assertRaisesRegexp(ValueError, msg):
66-
# read_table(StringIO(data), engine='c', sep='§')
6763
with tm.assertRaisesRegexp(ValueError, msg):
6864
read_table(StringIO(data), engine='c', skipfooter=1)
6965

0 commit comments

Comments
 (0)