-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
BUG: encoding error in to_csv compression #21300
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 9 commits
fdd3ce9
826aa2c
fac53c0
a4de620
b833abd
b625f08
9d5c25b
8ed6fa2
2d48d10
4a6f5ff
bf4225c
486f3ff
d8435ef
16cc951
6714e68
f73f9ff
f891533
f3c3ea7
4e914ff
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -919,29 +919,37 @@ def test_to_csv_path_is_none(self): | |
recons = pd.read_csv(StringIO(csv_str), index_col=0) | ||
assert_frame_equal(self.frame, recons) | ||
|
||
def test_to_csv_compression(self, compression): | ||
|
||
df = DataFrame([[0.123456, 0.234567, 0.567567], | ||
[12.32112, 123123.2, 321321.2]], | ||
index=['A', 'B'], columns=['X', 'Y', 'Z']) | ||
@pytest.mark.parametrize('frame, encoding', [ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. sure. |
||
(DataFrame([[0.123456, 0.234567, 0.567567], | ||
[12.32112, 123123.2, 321321.2]], | ||
index=['A', 'B'], columns=['X', 'Y', 'Z']), None), | ||
(DataFrame([['abc', 'def', 'ghi']], columns=['X', 'Y', 'Z']), 'ascii'), | ||
(DataFrame(5 * [[123, u"你好", u"世界"]], | ||
columns=['X', 'Y', 'Z']), 'gb2312'), | ||
(DataFrame(5 * [[123, u"Γειά σου", u"Κόσμε"]], | ||
columns=['X', 'Y', 'Z']), 'cp737') | ||
]) | ||
def test_to_csv_compression(self, frame, encoding, compression): | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same comment as below, add the issue numbers as comments |
||
with ensure_clean() as filename: | ||
|
||
df.to_csv(filename, compression=compression) | ||
frame.to_csv(filename, compression=compression, encoding=encoding) | ||
|
||
# test the round trip - to_csv -> read_csv | ||
rs = read_csv(filename, compression=compression, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you change There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done. |
||
index_col=0) | ||
assert_frame_equal(df, rs) | ||
index_col=0, encoding=encoding) | ||
assert_frame_equal(frame, rs) | ||
|
||
# explicitly make sure file is compressed | ||
with tm.decompress_file(filename, compression) as fh: | ||
text = fh.read().decode('utf8') | ||
for col in df.columns: | ||
text = fh.read().decode(encoding or 'utf8') | ||
for col in frame.columns: | ||
assert col in text | ||
|
||
with tm.decompress_file(filename, compression) as fh: | ||
assert_frame_equal(df, read_csv(fh, index_col=0)) | ||
assert_frame_equal(frame, read_csv(fh, | ||
index_col=0, | ||
encoding=encoding)) | ||
|
||
def test_to_csv_date_format(self): | ||
with ensure_clean('__tmp_to_csv_date_format__') as path: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -137,29 +137,35 @@ def test_to_csv_path_is_none(self): | |
csv_str = s.to_csv(path=None) | ||
assert isinstance(csv_str, str) | ||
|
||
def test_to_csv_compression(self, compression): | ||
|
||
s = Series([0.123456, 0.234567, 0.567567], index=['A', 'B', 'C'], | ||
name='X') | ||
@pytest.mark.parametrize('s, encoding', [ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Remove whitespace between parameter names There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done. |
||
(Series([0.123456, 0.234567, 0.567567], index=['A', 'B', 'C'], | ||
name='X'), None), | ||
(Series(['abc', 'def', 'ghi'], name='X'), 'ascii'), | ||
(Series(["123", u"你好", u"世界"], name=u"中文"), 'gb2312'), | ||
(Series(["123", u"Γειά σου", u"Κόσμε"], name=u"Ελληνικά"), 'cp737') | ||
]) | ||
def test_to_csv_compression(self, s, encoding, compression): | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you add the releveant issues here as comments (e.g. the issues you are closing) |
||
with ensure_clean() as filename: | ||
|
||
s.to_csv(filename, compression=compression, header=True) | ||
s.to_csv(filename, compression=compression, encoding=encoding, | ||
header=True) | ||
|
||
# test the round trip - to_csv -> read_csv | ||
rs = pd.read_csv(filename, compression=compression, | ||
index_col=0, squeeze=True) | ||
encoding=encoding, index_col=0, squeeze=True) | ||
assert_series_equal(s, rs) | ||
|
||
# explicitly ensure file was compressed | ||
with tm.decompress_file(filename, compression) as fh: | ||
text = fh.read().decode('utf8') | ||
text = fh.read().decode(encoding or 'utf8') | ||
assert s.name in text | ||
|
||
with tm.decompress_file(filename, compression) as fh: | ||
assert_series_equal(s, pd.read_csv(fh, | ||
index_col=0, | ||
squeeze=True)) | ||
squeeze=True, | ||
encoding=encoding)) | ||
|
||
|
||
class TestSeriesIO(TestData): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could also mention Series.to_csv here
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
added.