Skip to content

Commit 9c88919

Browse files
committed
BUG: read_csv throws UnicodeDecodeError with unicode aliases
read_csv with engine=c throws error when encoding=UTF_16 or when encoding has _ or uppercase improved testing loops and added multibyte testing see issue pandas-dev#13549
1 parent 75869f4 commit 9c88919

File tree

1 file changed

+15
-26
lines changed

1 file changed

+15
-26
lines changed

pandas/io/tests/parser/common.py

+15-26
Original file line numberDiff line numberDiff line change
@@ -1470,33 +1470,22 @@ def test_memory_map(self):
14701470
out = self.read_csv(mmap_file, memory_map=True)
14711471
tm.assert_frame_equal(out, expected)
14721472

1473-
def test_read_csv_utf_aliases():
1473+
def test_read_csv_utf_aliases(self):
14741474
# see gh issue 13549
1475-
engines = ['c', 'python', None]
14761475
path = 'test.csv'
1477-
expected = DataFrame({"A": [0, 1], "B": [2, 3]})
1478-
expected.to_csv(path, encoding='utf-8', index=False)
1479-
test_encodings = ['utf-8', 'utf_8', 'UTF_8', 'UTF-8']
1480-
1481-
for encoding in test_encodings:
1482-
for engine in engines:
1483-
out = pd.io.parsers.read_csv(
1484-
path,
1485-
engine=engine,
1486-
encoding=encoding)
1487-
tm.assert_frame_equal(out, expected)
1488-
1489-
os.remove("test.csv")
1490-
1491-
expected.to_csv(path, encoding='utf-16', index=False)
1492-
test_encodings = ['utf-16', 'utf_16', 'UTF_16', 'UTF-16']
1493-
1494-
for encoding in test_encodings:
1495-
for engine in engines:
1496-
out = pd.io.parsers.read_csv(
1497-
path,
1498-
engine=engine,
1499-
encoding=encoding)
1500-
tm.assert_frame_equal(out, expected)
1476+
expected = pd.DataFrame({'A': [0, 1], 'B': [2, 3],
1477+
'multibyte_test': ['testing123', 'bananabis'],
1478+
'mb_nums': [154.868, 457.8798]})
1479+
1480+
for byte in [8, 16]:
1481+
expected.to_csv(path, encoding='utf-' + str(byte), index=False)
1482+
for fmt in ['utf-{0}', 'utf_{0}', 'UTF-{0}', 'UTF_{0}']:
1483+
encoding = fmt.format(byte)
1484+
for engine in ['c', 'python', None]:
1485+
out = self.read_csv(
1486+
path,
1487+
engine=engine,
1488+
encoding=encoding)
1489+
tm.assert_frame_equal(out, expected)
15011490

15021491
os.remove("test.csv")

0 commit comments

Comments
 (0)