Skip to content

Commit 75869f4

Browse files
committed
BUG: read_csv throws UnicodeDecodeError with unicode
improved testing, added utf-8 to testing, moved testing to pandas/io/tests/parser/common.py see issue # 13549
1 parent b8d78c4 commit 75869f4

File tree

2 files changed

+31
-32
lines changed

2 files changed

+31
-32
lines changed

pandas/io/tests/parser/common.py

+31
Original file line numberDiff line numberDiff line change
@@ -1469,3 +1469,34 @@ def test_memory_map(self):
14691469

14701470
out = self.read_csv(mmap_file, memory_map=True)
14711471
tm.assert_frame_equal(out, expected)
1472+
1473+
def test_read_csv_utf_aliases():
1474+
# see gh issue 13549
1475+
engines = ['c', 'python', None]
1476+
path = 'test.csv'
1477+
expected = DataFrame({"A": [0, 1], "B": [2, 3]})
1478+
expected.to_csv(path, encoding='utf-8', index=False)
1479+
test_encodings = ['utf-8', 'utf_8', 'UTF_8', 'UTF-8']
1480+
1481+
for encoding in test_encodings:
1482+
for engine in engines:
1483+
out = pd.io.parsers.read_csv(
1484+
path,
1485+
engine=engine,
1486+
encoding=encoding)
1487+
tm.assert_frame_equal(out, expected)
1488+
1489+
os.remove("test.csv")
1490+
1491+
expected.to_csv(path, encoding='utf-16', index=False)
1492+
test_encodings = ['utf-16', 'utf_16', 'UTF_16', 'UTF-16']
1493+
1494+
for encoding in test_encodings:
1495+
for engine in engines:
1496+
out = pd.io.parsers.read_csv(
1497+
path,
1498+
engine=engine,
1499+
encoding=encoding)
1500+
tm.assert_frame_equal(out, expected)
1501+
1502+
os.remove("test.csv")

pandas/tests/io/test_encoding_aliases.py

-32
This file was deleted.

0 commit comments

Comments
 (0)