Skip to content

Commit 17440b6

Browse files
committed
BUG: fixed line terminator behavior on Windows
* related issue: pandas-dev#20353
1 parent 3471b98 commit 17440b6

File tree

3 files changed

+75
-17
lines changed

3 files changed

+75
-17
lines changed

pandas/io/common.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import csv
55
import codecs
66
import mmap
7+
import io
78
from contextlib import contextmanager, closing
89
from zipfile import ZipFile
910

@@ -394,13 +395,13 @@ def _get_handle(path_or_buf, mode, encoding=None, compression=None,
394395
elif is_path:
395396
if compat.PY2:
396397
# Python 2
397-
f = open(path_or_buf, mode)
398+
f = io.open(path_or_buf, mode, newline='\n')
398399
elif encoding:
399400
# Python 3 and encoding
400-
f = open(path_or_buf, mode, encoding=encoding)
401+
f = open(path_or_buf, mode, encoding=encoding, newline='\n')
401402
elif is_text:
402403
# Python 3 and no explicit encoding
403-
f = open(path_or_buf, mode, errors='replace')
404+
f = open(path_or_buf, mode, errors='replace', newline='\n')
404405
else:
405406
# Python 3 and binary mode
406407
f = open(path_or_buf, mode)

pandas/tests/frame/test_to_csv.py

+12-14
Original file line numberDiff line numberDiff line change
@@ -871,24 +871,22 @@ def test_to_csv_index_no_leading_comma(self):
871871
assert buf.getvalue() == expected
872872

873873
def test_to_csv_line_terminators(self):
874+
# GH 20353
874875
df = DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]},
875876
index=['one', 'two', 'three'])
876877

877-
buf = StringIO()
878-
df.to_csv(buf, line_terminator='\r\n')
879-
expected = (',A,B\r\n'
880-
'one,1,4\r\n'
881-
'two,2,5\r\n'
882-
'three,3,6\r\n')
883-
assert buf.getvalue() == expected
878+
with ensure_clean() as path:
879+
df.to_csv(path, line_terminator='\r\n')
884880

885-
buf = StringIO()
886-
df.to_csv(buf) # The default line terminator remains \n
887-
expected = (',A,B\n'
888-
'one,1,4\n'
889-
'two,2,5\n'
890-
'three,3,6\n')
891-
assert buf.getvalue() == expected
881+
expected = (b',A,B\r\none,1,4\r\ntwo,2,5\r\nthree,3,6\r\n')
882+
with open(path, mode='rb') as f:
883+
assert f.read() == expected
884+
885+
df.to_csv(path) # The default line terminator remains \n
886+
887+
expected = (b',A,B\none,1,4\ntwo,2,5\nthree,3,6\n')
888+
with open(path, mode='rb') as f:
889+
assert f.read() == expected
892890

893891
def test_to_csv_from_csv_categorical(self):
894892

pandas/tests/io/formats/test_to_csv.py

+59
Original file line numberDiff line numberDiff line change
@@ -285,3 +285,62 @@ def test_to_csv_string_array_utf8(self):
285285
df.to_csv(path, encoding='utf-8')
286286
with open(path, 'r') as f:
287287
assert f.read() == expected_utf8
288+
289+
def test_to_csv_string_with_lf(self):
290+
# GH 20353
291+
data = {
292+
'int': [1, 2, 3],
293+
'str_lf': ['abc', 'd\nef', 'g\nh\n\ni']
294+
}
295+
df = pd.DataFrame(data)
296+
297+
with tm.ensure_clean('lf_test.csv') as path:
298+
expected_bin = \
299+
b'int,str_lf\n' \
300+
b'1,abc\n' \
301+
b'2,"d\nef"\n' \
302+
b'3,"g\nh\n\ni"\n'
303+
304+
df.to_csv(path, index=False)
305+
with open(path, 'rb') as f:
306+
assert f.read() == expected_bin
307+
308+
# 'line_terminator' should not change inner element
309+
expected_bin = \
310+
b'int,str_lf\r\n' \
311+
b'1,abc\r\n' \
312+
b'2,"d\nef"\r\n' \
313+
b'3,"g\nh\n\ni"\r\n'
314+
315+
df.to_csv(path, line_terminator='\r\n', index=False)
316+
with open(path, 'rb') as f:
317+
assert f.read() == expected_bin
318+
319+
def test_to_csv_string_with_crlf(self):
320+
# GH 20353
321+
data = {
322+
'int': [1, 2, 3],
323+
'str_crlf': ['abc', 'd\r\nef', 'g\r\nh\r\n\r\ni']
324+
}
325+
df = pd.DataFrame(data)
326+
327+
with tm.ensure_clean('crlf_test.csv') as path:
328+
expected_bin = \
329+
b'int,str_crlf\n' \
330+
b'1,abc\n' \
331+
b'2,"d\r\nef"\n' \
332+
b'3,"g\r\nh\r\n\r\ni"\n'
333+
334+
df.to_csv(path, index=False)
335+
with open(path, 'rb') as f:
336+
assert f.read() == expected_bin
337+
338+
expected_bin = \
339+
b'int,str_crlf\r\n' \
340+
b'1,abc\r\n' \
341+
b'2,"d\r\nef"\r\n' \
342+
b'3,"g\r\nh\r\n\r\ni"\r\n'
343+
344+
df.to_csv(path, line_terminator='\r\n', index=False)
345+
with open(path, 'rb') as f:
346+
assert f.read() == expected_bin

0 commit comments

Comments
 (0)