Skip to content

Commit d759156

Browse files
committed
BUG: properly close files opened by parsers
1 parent e89a0a0 commit d759156

File tree

6 files changed

+25
-10
lines changed

6 files changed

+25
-10
lines changed

doc/source/whatsnew/v0.19.0.txt

+2
Original file line numberDiff line numberDiff line change
@@ -902,6 +902,8 @@ Bug Fixes
902902
- Bug in ``pd.read_csv()`` that prevents ``usecols`` from being an empty set (:issue:`13402`)
903903
- Bug in ``pd.read_csv()`` with ``engine='c'`` in which null ``quotechar`` was not accepted even though ``quoting`` was specified as ``None`` (:issue:`13411`)
904904
- Bug in ``pd.read_csv()`` with ``engine='c'`` in which fields were not properly cast to float when quoting was specified as non-numeric (:issue:`13411`)
905+
- Bug in ``pd.read_csv``, ``pd.read_table`` and ``pd.read_stata`` where files were opened by parsers but not closed if both ``chunksize`` and ``iterator`` were ``None``. (:issue:`13940`)
906+
- Bug in ``StataReader`` and ``StataWriter`` where a file was not properly closed when an error was raised. (:issue:`13940`)
905907
- Bug in ``pd.pivot_table()`` where ``margins_name`` is ignored when ``aggfunc`` is a list (:issue:`13354`)
906908
- Bug in ``pd.Series.str.zfill``, ``center``, ``ljust``, ``rjust``, and ``pad`` when passing non-integers, did not raise ``TypeError`` (:issue:`13598`)
907909
- Bug in checking for any null objects in a ``TimedeltaIndex``, which always returned ``True`` (:issue:`13603`)

pandas/io/common.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -327,7 +327,9 @@ def _get_handle(path, mode, encoding=None, compression=None, memory_map=False):
327327

328328
if memory_map and hasattr(f, 'fileno'):
329329
try:
330-
f = MMapWrapper(f)
330+
g = MMapWrapper(f)
331+
f.close()
332+
f = g
331333
except Exception:
332334
# we catch any errors that may have occurred
333335
# because that is consistent with the lower-level

pandas/io/parsers.py

+15-7
Original file line numberDiff line numberDiff line change
@@ -393,11 +393,15 @@ def _read(filepath_or_buffer, kwds):
393393
raise NotImplementedError("'nrows' and 'chunksize' cannot be used"
394394
" together yet.")
395395
elif nrows is not None:
396-
return parser.read(nrows)
396+
data = parser.read(nrows)
397+
parser.close()
398+
return data
397399
elif chunksize or iterator:
398400
return parser
399401

400-
return parser.read()
402+
data = parser.read()
403+
parser.close()
404+
return data
401405

402406
_parser_defaults = {
403407
'delimiter': None,
@@ -727,10 +731,7 @@ def __init__(self, f, engine=None, **kwds):
727731
self._make_engine(self.engine)
728732

729733
def close(self):
730-
try:
731-
self._engine._reader.close()
732-
except:
733-
pass
734+
self._engine.close()
734735

735736
def _get_options_with_defaults(self, engine):
736737
kwds = self.orig_options
@@ -1057,8 +1058,13 @@ def __init__(self, kwds):
10571058

10581059
self._first_chunk = True
10591060

1061+
# GH 13932
1062+
# keep references to file handles opened by the parser itself
1063+
self.handles = []
1064+
10601065
def close(self):
1061-
self._reader.close()
1066+
for f in self.handles:
1067+
f.close()
10621068

10631069
@property
10641070
def _has_complex_date_col(self):
@@ -1356,6 +1362,7 @@ def __init__(self, src, **kwds):
13561362
if 'utf-16' in (kwds.get('encoding') or ''):
13571363
if isinstance(src, compat.string_types):
13581364
src = open(src, 'rb')
1365+
self.handles.append(src)
13591366
src = UTF8Recoder(src, kwds['encoding'])
13601367
kwds['encoding'] = 'utf-8'
13611368

@@ -1760,6 +1767,7 @@ def __init__(self, f, **kwds):
17601767
f = TextIOWrapper(f, encoding=self.encoding)
17611768

17621769
# Set self.data to something that can read lines.
1770+
self.handles.append(f)
17631771
if hasattr(f, 'readline'):
17641772
self._make_reader(f)
17651773
else:

pandas/io/tests/parser/common.py

+1
Original file line numberDiff line numberDiff line change
@@ -1580,5 +1580,6 @@ def test_temporary_file(self):
15801580
new_file.seek(0)
15811581

15821582
result = self.read_csv(new_file, sep='\s+', header=None)
1583+
new_file.close()
15831584
expected = DataFrame([[0, 0]])
15841585
tm.assert_frame_equal(result, expected)

pandas/io/tests/parser/python_parser_only.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,8 @@ def test_decompression_regex_sep(self):
130130
except ImportError:
131131
raise nose.SkipTest('need gzip and bz2 to run')
132132

133-
data = open(self.csv1, 'rb').read()
133+
with open(self.csv1, 'rb') as f:
134+
data = f.read()
134135
data = data.replace(b',', b'::')
135136
expected = self.read_csv(self.csv1)
136137

pandas/io/tests/parser/test_textreader.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,8 @@ def test_file_handle_mmap(self):
5454
f.close()
5555

5656
def test_StringIO(self):
57-
text = open(self.csv1, 'rb').read()
57+
with open(self.csv1, 'rb') as f:
58+
text = f.read()
5859
src = BytesIO(text)
5960
reader = TextReader(src, header=None)
6061
reader.read()

0 commit comments

Comments
 (0)