Skip to content

Commit 7db128a

Browse files
avinashpanchampull[bot]
authored andcommitted
TST: Verify parsing of data with encoded special characters (16218) (#36841)
* TST: Verify parsing of data with encoded special characters * Move
1 parent e628728 commit 7db128a

File tree

1 file changed

+15
-1
lines changed

1 file changed

+15
-1
lines changed

pandas/tests/io/parser/test_encoding.py

+15-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
import numpy as np
1111
import pytest
1212

13-
from pandas import DataFrame
13+
from pandas import DataFrame, read_csv
1414
import pandas._testing as tm
1515

1616

@@ -199,3 +199,17 @@ def test_encoding_named_temp_file(all_parsers):
199199

200200
result = parser.read_csv(f, encoding=encoding)
201201
tm.assert_frame_equal(result, expected)
202+
203+
204+
@pytest.mark.parametrize(
205+
"encoding", ["utf-8", "utf-16", "utf-16-be", "utf-16-le", "utf-32"]
206+
)
207+
def test_parse_encoded_special_characters(encoding):
208+
# GH16218 Verify parsing of data with encoded special characters
209+
# Data contains a Unicode 'FULLWIDTH COLON' (U+FF1A) at position (0,"a")
210+
data = "a\tb\n:foo\t0\nbar\t1\nbaz\t2"
211+
encoded_data = BytesIO(data.encode(encoding))
212+
result = read_csv(encoded_data, delimiter="\t", encoding=encoding)
213+
214+
expected = DataFrame(data=[[":foo", 0], ["bar", 1], ["baz", 2]], columns=["a", "b"])
215+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)