Skip to content

Commit cfc787e

Browse files
mingglijorisvandenbossche
authored andcommitted
BUG/REG: file-handle object handled incorrectly in to_csv (#21478)
(cherry picked from commit 91451cb)
1 parent 84ef7ba commit cfc787e

File tree

6 files changed

+87
-46
lines changed

6 files changed

+87
-46
lines changed

doc/source/whatsnew/v0.23.2.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ and bug fixes. We recommend that all users upgrade to this version.
1616
Fixed Regressions
1717
~~~~~~~~~~~~~~~~~
1818

19-
-
19+
- Fixed regression in :meth:`to_csv` when handling file-like object incorrectly (:issue:`21471`)
2020
-
2121

2222
.. _whatsnew_0232.performance:

pandas/io/common.py

+4
Original file line numberDiff line numberDiff line change
@@ -445,6 +445,10 @@ def __init__(self, file, mode, compression=zipfile.ZIP_DEFLATED, **kwargs):
445445
def write(self, data):
446446
super(BytesZipFile, self).writestr(self.filename, data)
447447

448+
@property
449+
def closed(self):
450+
return self.fp is None
451+
448452

449453
class MMapWrapper(BaseIterator):
450454
"""

pandas/io/formats/csvs.py

+39-20
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,13 @@
55

66
from __future__ import print_function
77

8+
import warnings
9+
810
import csv as csvlib
11+
from zipfile import ZipFile
912
import numpy as np
1013

1114
from pandas.core.dtypes.missing import notna
12-
from pandas.core.dtypes.inference import is_file_like
1315
from pandas.core.index import Index, MultiIndex
1416
from pandas import compat
1517
from pandas.compat import (StringIO, range, zip)
@@ -128,19 +130,31 @@ def save(self):
128130
else:
129131
encoding = self.encoding
130132

131-
# PR 21300 uses string buffer to receive csv writing and dump into
132-
# file-like output with compression as option. GH 21241, 21118
133-
f = StringIO()
134-
if not is_file_like(self.path_or_buf):
135-
# path_or_buf is path
136-
path_or_buf = self.path_or_buf
137-
elif hasattr(self.path_or_buf, 'name'):
138-
# path_or_buf is file handle
139-
path_or_buf = self.path_or_buf.name
140-
else:
141-
# path_or_buf is file-like IO objects.
133+
# GH 21227 internal compression is not used when file-like passed.
134+
if self.compression and hasattr(self.path_or_buf, 'write'):
135+
msg = ("compression has no effect when passing file-like "
136+
"object as input.")
137+
warnings.warn(msg, RuntimeWarning, stacklevel=2)
138+
139+
# when zip compression is called.
140+
is_zip = isinstance(self.path_or_buf, ZipFile) or (
141+
not hasattr(self.path_or_buf, 'write')
142+
and self.compression == 'zip')
143+
144+
if is_zip:
145+
# zipfile doesn't support writing string to archive. uses string
146+
# buffer to receive csv writing and dump into zip compression
147+
# file handle. GH 21241, 21118
148+
f = StringIO()
149+
close = False
150+
elif hasattr(self.path_or_buf, 'write'):
142151
f = self.path_or_buf
143-
path_or_buf = None
152+
close = False
153+
else:
154+
f, handles = _get_handle(self.path_or_buf, self.mode,
155+
encoding=encoding,
156+
compression=self.compression)
157+
close = True
144158

145159
try:
146160
writer_kwargs = dict(lineterminator=self.line_terminator,
@@ -157,13 +171,18 @@ def save(self):
157171
self._save()
158172

159173
finally:
160-
# GH 17778 handles zip compression for byte strings separately.
161-
buf = f.getvalue()
162-
if path_or_buf:
163-
f, handles = _get_handle(path_or_buf, self.mode,
164-
encoding=encoding,
165-
compression=self.compression)
166-
f.write(buf)
174+
if is_zip:
175+
# GH 17778 handles zip compression separately.
176+
buf = f.getvalue()
177+
if hasattr(self.path_or_buf, 'write'):
178+
self.path_or_buf.write(buf)
179+
else:
180+
f, handles = _get_handle(self.path_or_buf, self.mode,
181+
encoding=encoding,
182+
compression=self.compression)
183+
f.write(buf)
184+
close = True
185+
if close:
167186
f.close()
168187
for _fh in handles:
169188
_fh.close()

pandas/tests/frame/test_to_csv.py

+9-7
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import numpy as np
1010

1111
from pandas.compat import (lmap, range, lrange, StringIO, u)
12+
from pandas.io.common import _get_handle
1213
import pandas.core.common as com
1314
from pandas.errors import ParserError
1415
from pandas import (DataFrame, Index, Series, MultiIndex, Timestamp,
@@ -935,18 +936,19 @@ def test_to_csv_compression(self, df, encoding, compression):
935936
with ensure_clean() as filename:
936937

937938
df.to_csv(filename, compression=compression, encoding=encoding)
938-
939939
# test the round trip - to_csv -> read_csv
940940
result = read_csv(filename, compression=compression,
941941
index_col=0, encoding=encoding)
942+
assert_frame_equal(df, result)
942943

943-
with open(filename, 'w') as fh:
944-
df.to_csv(fh, compression=compression, encoding=encoding)
945-
946-
result_fh = read_csv(filename, compression=compression,
947-
index_col=0, encoding=encoding)
944+
# test the round trip using file handle - to_csv -> read_csv
945+
f, _handles = _get_handle(filename, 'w', compression=compression,
946+
encoding=encoding)
947+
with f:
948+
df.to_csv(f, encoding=encoding)
949+
result = pd.read_csv(filename, compression=compression,
950+
encoding=encoding, index_col=0, squeeze=True)
948951
assert_frame_equal(df, result)
949-
assert_frame_equal(df, result_fh)
950952

951953
# explicitly make sure file is compressed
952954
with tm.decompress_file(filename, compression) as fh:

pandas/tests/series/test_io.py

+9-9
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from pandas import Series, DataFrame
1212

1313
from pandas.compat import StringIO, u
14+
from pandas.io.common import _get_handle
1415
from pandas.util.testing import (assert_series_equal, assert_almost_equal,
1516
assert_frame_equal, ensure_clean)
1617
import pandas.util.testing as tm
@@ -152,20 +153,19 @@ def test_to_csv_compression(self, s, encoding, compression):
152153

153154
s.to_csv(filename, compression=compression, encoding=encoding,
154155
header=True)
155-
156156
# test the round trip - to_csv -> read_csv
157157
result = pd.read_csv(filename, compression=compression,
158158
encoding=encoding, index_col=0, squeeze=True)
159+
assert_series_equal(s, result)
159160

160-
with open(filename, 'w') as fh:
161-
s.to_csv(fh, compression=compression, encoding=encoding,
162-
header=True)
163-
164-
result_fh = pd.read_csv(filename, compression=compression,
165-
encoding=encoding, index_col=0,
166-
squeeze=True)
161+
# test the round trip using file handle - to_csv -> read_csv
162+
f, _handles = _get_handle(filename, 'w', compression=compression,
163+
encoding=encoding)
164+
with f:
165+
s.to_csv(f, encoding=encoding, header=True)
166+
result = pd.read_csv(filename, compression=compression,
167+
encoding=encoding, index_col=0, squeeze=True)
167168
assert_series_equal(s, result)
168-
assert_series_equal(s, result_fh)
169169

170170
# explicitly ensure file was compressed
171171
with tm.decompress_file(filename, compression) as fh:

pandas/tests/test_common.py

+25-9
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from pandas.compat import range, lmap
1212
import pandas.core.common as com
1313
from pandas.core import ops
14+
from pandas.io.common import _get_handle
1415
import pandas.util.testing as tm
1516

1617

@@ -248,19 +249,34 @@ def test_compression_size(obj, method, compression):
248249
[12.32112, 123123.2, 321321.2]],
249250
columns=['X', 'Y', 'Z']),
250251
Series(100 * [0.123456, 0.234567, 0.567567], name='X')])
251-
@pytest.mark.parametrize('method', ['to_csv'])
252+
@pytest.mark.parametrize('method', ['to_csv', 'to_json'])
252253
def test_compression_size_fh(obj, method, compression_only):
253254

254255
with tm.ensure_clean() as filename:
255-
with open(filename, 'w') as fh:
256-
getattr(obj, method)(fh, compression=compression_only)
257-
assert not fh.closed
258-
assert fh.closed
256+
f, _handles = _get_handle(filename, 'w', compression=compression_only)
257+
with f:
258+
getattr(obj, method)(f)
259+
assert not f.closed
260+
assert f.closed
259261
compressed = os.path.getsize(filename)
260262
with tm.ensure_clean() as filename:
261-
with open(filename, 'w') as fh:
262-
getattr(obj, method)(fh, compression=None)
263-
assert not fh.closed
264-
assert fh.closed
263+
f, _handles = _get_handle(filename, 'w', compression=None)
264+
with f:
265+
getattr(obj, method)(f)
266+
assert not f.closed
267+
assert f.closed
265268
uncompressed = os.path.getsize(filename)
266269
assert uncompressed > compressed
270+
271+
272+
# GH 21227
273+
def test_compression_warning(compression_only):
274+
df = DataFrame(100 * [[0.123456, 0.234567, 0.567567],
275+
[12.32112, 123123.2, 321321.2]],
276+
columns=['X', 'Y', 'Z'])
277+
with tm.ensure_clean() as filename:
278+
f, _handles = _get_handle(filename, 'w', compression=compression_only)
279+
with tm.assert_produces_warning(RuntimeWarning,
280+
check_stacklevel=False):
281+
with f:
282+
df.to_csv(f, compression=compression_only)

0 commit comments

Comments
 (0)