Skip to content

Commit e7d7872

Browse files
[Backport #14520] BUG: don't close user-provided file handles in C parser (GH14418) (#14520)
(cherry picked from commit eb7bd99)
1 parent 2ca29ef commit e7d7872

File tree

4 files changed

+30
-5
lines changed

4 files changed

+30
-5
lines changed

doc/source/whatsnew/v0.19.1.txt

+1
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ Bug Fixes
3838

3939
- Bug in ``pd.read_csv`` for the C engine in which quotation marks were improperly parsed in skipped rows (:issue:`14459`)
4040
- Bug in ``pd.read_csv`` for Python 2.x in which Unicode quote characters were no longer being respected (:issue:`14477`)
41+
- Fixed regression where user-provided file handles were closed in ``read_csv`` (c engine) (:issue:`14418`).
4142
- Bug in localizing an ambiguous timezone when a boolean is passed (:issue:`14402`)
4243
- Bug in ``TimedeltaIndex`` addition with a Datetime-like object where addition overflow in the negative direction was not being caught (:issue:`14068`, :issue:`14453`)
4344

pandas/io/parsers.py

+2
Original file line numberDiff line numberDiff line change
@@ -1456,6 +1456,8 @@ def __init__(self, src, **kwds):
14561456
def close(self):
14571457
for f in self.handles:
14581458
f.close()
1459+
1460+
# close additional handles opened by C parser (for compression)
14591461
try:
14601462
self._reader.close()
14611463
except:

pandas/io/tests/parser/common.py

+23
Original file line numberDiff line numberDiff line change
@@ -1602,3 +1602,26 @@ def test_internal_eof_byte(self):
16021602
expected = pd.DataFrame([["1\x1a", 2]], columns=['a', 'b'])
16031603
result = self.read_csv(StringIO(data))
16041604
tm.assert_frame_equal(result, expected)
1605+
1606+
def test_file_handles(self):
1607+
# GH 14418 - don't close user provided file handles
1608+
1609+
fh = StringIO('a,b\n1,2')
1610+
self.read_csv(fh)
1611+
self.assertFalse(fh.closed)
1612+
1613+
with open(self.csv1, 'r') as f:
1614+
self.read_csv(f)
1615+
self.assertFalse(f.closed)
1616+
1617+
# mmap not working with python engine
1618+
if self.engine != 'python':
1619+
1620+
import mmap
1621+
with open(self.csv1, 'r') as f:
1622+
m = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
1623+
self.read_csv(m)
1624+
# closed attribute new in python 3.2
1625+
if PY3:
1626+
self.assertFalse(m.closed)
1627+
m.close()

pandas/parser.pyx

+4-5
Original file line numberDiff line numberDiff line change
@@ -272,7 +272,7 @@ cdef class TextReader:
272272
parser_t *parser
273273
object file_handle, na_fvalues
274274
object true_values, false_values
275-
object dsource
275+
object handle
276276
bint na_filter, verbose, has_usecols, has_mi_columns
277277
int parser_start
278278
list clocks
@@ -554,9 +554,9 @@ cdef class TextReader:
554554
def close(self):
555555
# we need to properly close an open derived
556556
# filehandle here, e.g. and UTFRecoder
557-
if self.dsource is not None:
557+
if self.handle is not None:
558558
try:
559-
self.dsource.close()
559+
self.handle.close()
560560
except:
561561
pass
562562

@@ -641,6 +641,7 @@ cdef class TextReader:
641641
else:
642642
raise ValueError('Unrecognized compression type: %s' %
643643
self.compression)
644+
self.handle = source
644645

645646
if isinstance(source, basestring):
646647
if not isinstance(source, bytes):
@@ -684,8 +685,6 @@ cdef class TextReader:
684685
raise IOError('Expected file path name or file-like object,'
685686
' got %s type' % type(source))
686687

687-
self.dsource = source
688-
689688
cdef _get_header(self):
690689
# header is now a list of lists, so field_count should use header[0]
691690

0 commit comments

Comments
 (0)