BUG: properly close files opened by parsers

agraboso · agraboso · commit d759156819fa · 2016-08-09T19:57:39.000-04:00
diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt
@@ -902,6 +902,8 @@ Bug Fixes
 - Bug in ``pd.read_csv()`` that prevents ``usecols`` from being an empty set (:issue:`13402`)
 - Bug in ``pd.read_csv()`` with ``engine='c'`` in which null ``quotechar`` was not accepted even though ``quoting`` was specified as ``None`` (:issue:`13411`)
 - Bug in ``pd.read_csv()`` with ``engine='c'`` in which fields were not properly cast to float when quoting was specified as non-numeric (:issue:`13411`)
+- Bug in ``pd.read_csv``, ``pd.read_table`` and ``pd.read_stata`` where files were opened by parsers but not closed if both ``chunksize`` and ``iterator`` were ``None``. (:issue:`13940`)
+- Bug in ``StataReader`` and ``StataWriter`` where a file was not properly closed when an error was raised. (:issue:`13940`)
 - Bug in ``pd.pivot_table()`` where ``margins_name`` is ignored when ``aggfunc`` is a list (:issue:`13354`)
 - Bug in ``pd.Series.str.zfill``, ``center``, ``ljust``, ``rjust``, and ``pad`` when passing non-integers, did not raise ``TypeError`` (:issue:`13598`)
 - Bug in checking for any null objects in a ``TimedeltaIndex``, which always returned ``True`` (:issue:`13603`)
diff --git a/pandas/io/common.py b/pandas/io/common.py
@@ -327,7 +327,9 @@ def _get_handle(path, mode, encoding=None, compression=None, memory_map=False):
 
     if memory_map and hasattr(f, 'fileno'):
         try:
-            f = MMapWrapper(f)
+            g = MMapWrapper(f)
+            f.close()
+            f = g
         except Exception:
             # we catch any errors that may have occurred
             # because that is consistent with the lower-level
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
@@ -393,11 +393,15 @@ def _read(filepath_or_buffer, kwds):
         raise NotImplementedError("'nrows' and 'chunksize' cannot be used"
                                   " together yet.")
     elif nrows is not None:
-        return parser.read(nrows)
+        data = parser.read(nrows)
+        parser.close()
+        return data
     elif chunksize or iterator:
         return parser
 
-    return parser.read()
+    data = parser.read()
+    parser.close()
+    return data
 
 _parser_defaults = {
     'delimiter': None,
@@ -727,10 +731,7 @@ def __init__(self, f, engine=None, **kwds):
         self._make_engine(self.engine)
 
     def close(self):
-        try:
-            self._engine._reader.close()
-        except:
-            pass
+        self._engine.close()
 
     def _get_options_with_defaults(self, engine):
         kwds = self.orig_options
@@ -1057,8 +1058,13 @@ def __init__(self, kwds):
 
         self._first_chunk = True
 
+        # GH 13932
+        # keep references to file handles opened by the parser itself
+        self.handles = []
+
     def close(self):
-        self._reader.close()
+        for f in self.handles:
+            f.close()
 
     @property
     def _has_complex_date_col(self):
@@ -1356,6 +1362,7 @@ def __init__(self, src, **kwds):
         if 'utf-16' in (kwds.get('encoding') or ''):
             if isinstance(src, compat.string_types):
                 src = open(src, 'rb')
+                self.handles.append(src)
             src = UTF8Recoder(src, kwds['encoding'])
             kwds['encoding'] = 'utf-8'
 
@@ -1760,6 +1767,7 @@ def __init__(self, f, **kwds):
             f = TextIOWrapper(f, encoding=self.encoding)
 
         # Set self.data to something that can read lines.
+        self.handles.append(f)
         if hasattr(f, 'readline'):
             self._make_reader(f)
         else:
diff --git a/pandas/io/tests/parser/common.py b/pandas/io/tests/parser/common.py
@@ -1580,5 +1580,6 @@ def test_temporary_file(self):
         new_file.seek(0)
 
         result = self.read_csv(new_file, sep='\s+', header=None)
+        new_file.close()
         expected = DataFrame([[0, 0]])
         tm.assert_frame_equal(result, expected)
diff --git a/pandas/io/tests/parser/python_parser_only.py b/pandas/io/tests/parser/python_parser_only.py
@@ -130,7 +130,8 @@ def test_decompression_regex_sep(self):
         except ImportError:
             raise nose.SkipTest('need gzip and bz2 to run')
 
-        data = open(self.csv1, 'rb').read()
+        with open(self.csv1, 'rb') as f:
+            data = f.read()
         data = data.replace(b',', b'::')
         expected = self.read_csv(self.csv1)
 
diff --git a/pandas/io/tests/parser/test_textreader.py b/pandas/io/tests/parser/test_textreader.py
@@ -54,7 +54,8 @@ def test_file_handle_mmap(self):
             f.close()
 
     def test_StringIO(self):
-        text = open(self.csv1, 'rb').read()
+        with open(self.csv1, 'rb') as f:
+            text = f.read()
         src = BytesIO(text)
         reader = TextReader(src, header=None)
         reader.read()