From ecaef69933d364ad196d535c72c154a5b8b633e0 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 27 Oct 2016 14:49:01 +0200 Subject: [PATCH 1/2] BUG: don't close user-provided file handles in C parser (GH14418) --- doc/source/whatsnew/v0.19.1.txt | 1 + pandas/io/parsers.py | 2 ++ pandas/io/tests/parser/common.py | 6 ++++++ pandas/parser.pyx | 9 ++++----- 4 files changed, 13 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v0.19.1.txt b/doc/source/whatsnew/v0.19.1.txt index 7594478ada41a..76f5a532f283a 100644 --- a/doc/source/whatsnew/v0.19.1.txt +++ b/doc/source/whatsnew/v0.19.1.txt @@ -37,6 +37,7 @@ Bug Fixes - Bug in ``pd.read_csv`` for Python 2.x in which Unicode quote characters were no longer being respected (:issue:`14477`) +- Fixed regression where user-provided file handles were closed in ``read_csv`` (c engine) (:issue:`14418`). - Bug in localizing an ambiguous timezone when a boolean is passed (:issue:`14402`) - Bug in ``TimedeltaIndex`` addition with a Datetime-like object where addition overflow in the negative direction was not being caught (:issue:`14068`, :issue:`14453`) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index e0127c3544971..1f2145da6bca7 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -1456,6 +1456,8 @@ def __init__(self, src, **kwds): def close(self): for f in self.handles: f.close() + + # close additional handles opened by C parser (for compression) try: self._reader.close() except: diff --git a/pandas/io/tests/parser/common.py b/pandas/io/tests/parser/common.py index 0219e16391be8..f2d4137e94f23 100644 --- a/pandas/io/tests/parser/common.py +++ b/pandas/io/tests/parser/common.py @@ -1602,3 +1602,9 @@ def test_internal_eof_byte(self): expected = pd.DataFrame([["1\x1a", 2]], columns=['a', 'b']) result = self.read_csv(StringIO(data)) tm.assert_frame_equal(result, expected) + + def test_file_handles(self): + # GH 14418 - don't close user provided file handles + fh = StringIO('a,b\n1,2') + self.read_csv(fh) + self.assertFalse(fh.closed) diff --git a/pandas/parser.pyx b/pandas/parser.pyx index 0a2824e74120c..93a494c176b99 100644 --- a/pandas/parser.pyx +++ b/pandas/parser.pyx @@ -272,7 +272,7 @@ cdef class TextReader: parser_t *parser object file_handle, na_fvalues object true_values, false_values - object dsource + object handle bint na_filter, verbose, has_usecols, has_mi_columns int parser_start list clocks @@ -554,9 +554,9 @@ cdef class TextReader: def close(self): # we need to properly close an open derived # filehandle here, e.g. and UTFRecoder - if self.dsource is not None: + if self.handle is not None: try: - self.dsource.close() + self.handle.close() except: pass @@ -641,6 +641,7 @@ cdef class TextReader: else: raise ValueError('Unrecognized compression type: %s' % self.compression) + self.handle = source if isinstance(source, basestring): if not isinstance(source, bytes): @@ -684,8 +685,6 @@ cdef class TextReader: raise IOError('Expected file path name or file-like object,' ' got %s type' % type(source)) - self.dsource = source - cdef _get_header(self): # header is now a list of lists, so field_count should use header[0] From 73510b68589a66f62f3cf3d3723042334908186d Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 28 Oct 2016 10:12:45 +0200 Subject: [PATCH 2/2] add file/mmap test cases --- pandas/io/tests/parser/common.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/pandas/io/tests/parser/common.py b/pandas/io/tests/parser/common.py index f2d4137e94f23..85ce4e42e0e28 100644 --- a/pandas/io/tests/parser/common.py +++ b/pandas/io/tests/parser/common.py @@ -1605,6 +1605,23 @@ def test_internal_eof_byte(self): def test_file_handles(self): # GH 14418 - don't close user provided file handles + fh = StringIO('a,b\n1,2') self.read_csv(fh) self.assertFalse(fh.closed) + + with open(self.csv1, 'r') as f: + self.read_csv(f) + self.assertFalse(f.closed) + + # mmap not working with python engine + if self.engine != 'python': + + import mmap + with open(self.csv1, 'r') as f: + m = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) + self.read_csv(m) + # closed attribute new in python 3.2 + if PY3: + self.assertFalse(m.closed) + m.close()