Skip to content

Commit 91451cb

Browse files
mingglijreback
authored andcommitted
BUG/REG: file-handle object handled incorrectly in to_csv (pandas-dev#21478)
1 parent 076635a commit 91451cb

File tree

8 files changed

+91
-48
lines changed

8 files changed

+91
-48
lines changed

doc/source/whatsnew/v0.23.2.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ and bug fixes. We recommend that all users upgrade to this version.
1616
Fixed Regressions
1717
~~~~~~~~~~~~~~~~~
1818

19-
-
19+
- Fixed regression in :meth:`to_csv` when handling file-like object incorrectly (:issue:`21471`)
2020
-
2121

2222
.. _whatsnew_0232.performance:

pandas/core/frame.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -1690,7 +1690,8 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None,
16901690
defaults to 'ascii' on Python 2 and 'utf-8' on Python 3.
16911691
compression : string, optional
16921692
A string representing the compression to use in the output file.
1693-
Allowed values are 'gzip', 'bz2', 'zip', 'xz'.
1693+
Allowed values are 'gzip', 'bz2', 'zip', 'xz'. This input is only
1694+
used when the first argument is a filename.
16941695
line_terminator : string, default ``'\n'``
16951696
The newline character or character sequence to use in the output
16961697
file

pandas/core/series.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -3790,7 +3790,8 @@ def to_csv(self, path=None, index=True, sep=",", na_rep='',
37903790
non-ascii, for python versions prior to 3
37913791
compression : string, optional
37923792
A string representing the compression to use in the output file.
3793-
Allowed values are 'gzip', 'bz2', 'zip', 'xz'.
3793+
Allowed values are 'gzip', 'bz2', 'zip', 'xz'. This input is only
3794+
used when the first argument is a filename.
37943795
date_format: string, default None
37953796
Format string for datetime objects.
37963797
decimal: string, default '.'

pandas/io/common.py

+4
Original file line numberDiff line numberDiff line change
@@ -445,6 +445,10 @@ def __init__(self, file, mode, compression=zipfile.ZIP_DEFLATED, **kwargs):
445445
def write(self, data):
446446
super(BytesZipFile, self).writestr(self.filename, data)
447447

448+
@property
449+
def closed(self):
450+
return self.fp is None
451+
448452

449453
class MMapWrapper(BaseIterator):
450454
"""

pandas/io/formats/csvs.py

+39-20
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,13 @@
55

66
from __future__ import print_function
77

8+
import warnings
9+
810
import csv as csvlib
11+
from zipfile import ZipFile
912
import numpy as np
1013

1114
from pandas.core.dtypes.missing import notna
12-
from pandas.core.dtypes.inference import is_file_like
1315
from pandas.core.index import Index, MultiIndex
1416
from pandas import compat
1517
from pandas.compat import (StringIO, range, zip)
@@ -128,19 +130,31 @@ def save(self):
128130
else:
129131
encoding = self.encoding
130132

131-
# PR 21300 uses string buffer to receive csv writing and dump into
132-
# file-like output with compression as option. GH 21241, 21118
133-
f = StringIO()
134-
if not is_file_like(self.path_or_buf):
135-
# path_or_buf is path
136-
path_or_buf = self.path_or_buf
137-
elif hasattr(self.path_or_buf, 'name'):
138-
# path_or_buf is file handle
139-
path_or_buf = self.path_or_buf.name
140-
else:
141-
# path_or_buf is file-like IO objects.
133+
# GH 21227 internal compression is not used when file-like passed.
134+
if self.compression and hasattr(self.path_or_buf, 'write'):
135+
msg = ("compression has no effect when passing file-like "
136+
"object as input.")
137+
warnings.warn(msg, RuntimeWarning, stacklevel=2)
138+
139+
# when zip compression is called.
140+
is_zip = isinstance(self.path_or_buf, ZipFile) or (
141+
not hasattr(self.path_or_buf, 'write')
142+
and self.compression == 'zip')
143+
144+
if is_zip:
145+
# zipfile doesn't support writing string to archive. uses string
146+
# buffer to receive csv writing and dump into zip compression
147+
# file handle. GH 21241, 21118
148+
f = StringIO()
149+
close = False
150+
elif hasattr(self.path_or_buf, 'write'):
142151
f = self.path_or_buf
143-
path_or_buf = None
152+
close = False
153+
else:
154+
f, handles = _get_handle(self.path_or_buf, self.mode,
155+
encoding=encoding,
156+
compression=self.compression)
157+
close = True
144158

145159
try:
146160
writer_kwargs = dict(lineterminator=self.line_terminator,
@@ -157,13 +171,18 @@ def save(self):
157171
self._save()
158172

159173
finally:
160-
# GH 17778 handles zip compression for byte strings separately.
161-
buf = f.getvalue()
162-
if path_or_buf:
163-
f, handles = _get_handle(path_or_buf, self.mode,
164-
encoding=encoding,
165-
compression=self.compression)
166-
f.write(buf)
174+
if is_zip:
175+
# GH 17778 handles zip compression separately.
176+
buf = f.getvalue()
177+
if hasattr(self.path_or_buf, 'write'):
178+
self.path_or_buf.write(buf)
179+
else:
180+
f, handles = _get_handle(self.path_or_buf, self.mode,
181+
encoding=encoding,
182+
compression=self.compression)
183+
f.write(buf)
184+
close = True
185+
if close:
167186
f.close()
168187
for _fh in handles:
169188
_fh.close()

pandas/tests/frame/test_to_csv.py

+9-7
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import numpy as np
1010

1111
from pandas.compat import (lmap, range, lrange, StringIO, u)
12+
from pandas.io.common import _get_handle
1213
import pandas.core.common as com
1314
from pandas.errors import ParserError
1415
from pandas import (DataFrame, Index, Series, MultiIndex, Timestamp,
@@ -935,18 +936,19 @@ def test_to_csv_compression(self, df, encoding, compression):
935936
with ensure_clean() as filename:
936937

937938
df.to_csv(filename, compression=compression, encoding=encoding)
938-
939939
# test the round trip - to_csv -> read_csv
940940
result = read_csv(filename, compression=compression,
941941
index_col=0, encoding=encoding)
942+
assert_frame_equal(df, result)
942943

943-
with open(filename, 'w') as fh:
944-
df.to_csv(fh, compression=compression, encoding=encoding)
945-
946-
result_fh = read_csv(filename, compression=compression,
947-
index_col=0, encoding=encoding)
944+
# test the round trip using file handle - to_csv -> read_csv
945+
f, _handles = _get_handle(filename, 'w', compression=compression,
946+
encoding=encoding)
947+
with f:
948+
df.to_csv(f, encoding=encoding)
949+
result = pd.read_csv(filename, compression=compression,
950+
encoding=encoding, index_col=0, squeeze=True)
948951
assert_frame_equal(df, result)
949-
assert_frame_equal(df, result_fh)
950952

951953
# explicitly make sure file is compressed
952954
with tm.decompress_file(filename, compression) as fh:

pandas/tests/series/test_io.py

+9-9
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from pandas import Series, DataFrame
1212

1313
from pandas.compat import StringIO, u
14+
from pandas.io.common import _get_handle
1415
from pandas.util.testing import (assert_series_equal, assert_almost_equal,
1516
assert_frame_equal, ensure_clean)
1617
import pandas.util.testing as tm
@@ -151,20 +152,19 @@ def test_to_csv_compression(self, s, encoding, compression):
151152

152153
s.to_csv(filename, compression=compression, encoding=encoding,
153154
header=True)
154-
155155
# test the round trip - to_csv -> read_csv
156156
result = pd.read_csv(filename, compression=compression,
157157
encoding=encoding, index_col=0, squeeze=True)
158+
assert_series_equal(s, result)
158159

159-
with open(filename, 'w') as fh:
160-
s.to_csv(fh, compression=compression, encoding=encoding,
161-
header=True)
162-
163-
result_fh = pd.read_csv(filename, compression=compression,
164-
encoding=encoding, index_col=0,
165-
squeeze=True)
160+
# test the round trip using file handle - to_csv -> read_csv
161+
f, _handles = _get_handle(filename, 'w', compression=compression,
162+
encoding=encoding)
163+
with f:
164+
s.to_csv(f, encoding=encoding, header=True)
165+
result = pd.read_csv(filename, compression=compression,
166+
encoding=encoding, index_col=0, squeeze=True)
166167
assert_series_equal(s, result)
167-
assert_series_equal(s, result_fh)
168168

169169
# explicitly ensure file was compressed
170170
with tm.decompress_file(filename, compression) as fh:

pandas/tests/test_common.py

+25-9
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from pandas.compat import range, lmap
1212
import pandas.core.common as com
1313
from pandas.core import ops
14+
from pandas.io.common import _get_handle
1415
import pandas.util.testing as tm
1516

1617

@@ -246,19 +247,34 @@ def test_compression_size(obj, method, compression_only):
246247
[12.32112, 123123.2, 321321.2]],
247248
columns=['X', 'Y', 'Z']),
248249
Series(100 * [0.123456, 0.234567, 0.567567], name='X')])
249-
@pytest.mark.parametrize('method', ['to_csv'])
250+
@pytest.mark.parametrize('method', ['to_csv', 'to_json'])
250251
def test_compression_size_fh(obj, method, compression_only):
251252

252253
with tm.ensure_clean() as filename:
253-
with open(filename, 'w') as fh:
254-
getattr(obj, method)(fh, compression=compression_only)
255-
assert not fh.closed
256-
assert fh.closed
254+
f, _handles = _get_handle(filename, 'w', compression=compression_only)
255+
with f:
256+
getattr(obj, method)(f)
257+
assert not f.closed
258+
assert f.closed
257259
compressed = os.path.getsize(filename)
258260
with tm.ensure_clean() as filename:
259-
with open(filename, 'w') as fh:
260-
getattr(obj, method)(fh, compression=None)
261-
assert not fh.closed
262-
assert fh.closed
261+
f, _handles = _get_handle(filename, 'w', compression=None)
262+
with f:
263+
getattr(obj, method)(f)
264+
assert not f.closed
265+
assert f.closed
263266
uncompressed = os.path.getsize(filename)
264267
assert uncompressed > compressed
268+
269+
270+
# GH 21227
271+
def test_compression_warning(compression_only):
272+
df = DataFrame(100 * [[0.123456, 0.234567, 0.567567],
273+
[12.32112, 123123.2, 321321.2]],
274+
columns=['X', 'Y', 'Z'])
275+
with tm.ensure_clean() as filename:
276+
f, _handles = _get_handle(filename, 'w', compression=compression_only)
277+
with tm.assert_produces_warning(RuntimeWarning,
278+
check_stacklevel=False):
279+
with f:
280+
df.to_csv(f, compression=compression_only)

0 commit comments

Comments
 (0)