TST: remove ResourceWarnings from stat by auto-closing iterator

jreback · jreback · commit 6f1ade19601c · 2016-04-13T15:48:54.000-04:00
TST: install scipy wheels from master on 3.5/dev, xref #12887 TST: skip writing excel if no writers installed
diff --git a/ci/install-3.5_NUMPY_DEV.sh b/ci/install-3.5_NUMPY_DEV.sh
@@ -15,6 +15,6 @@ pip uninstall numpy -y
 time sudo apt-get $APT_ARGS install libatlas-base-dev gfortran
 
 # install numpy wheel from master
-pip install --pre --upgrade --no-index --timeout=60 --trusted-host travis-dev-wheels.scipy.org -f http://travis-dev-wheels.scipy.org/ numpy
+pip install --pre --upgrade --no-index --timeout=60 --trusted-host travis-dev-wheels.scipy.org -f http://travis-dev-wheels.scipy.org/ numpy scipy
 
 true
diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py
@@ -251,6 +251,8 @@ def set_function_name(f, name, cls):
         f.__module__ = cls.__module__
         return f
 
+    ResourceWarning = ResourceWarning
+
 else:
     string_types = basestring,
     integer_types = (int, long)
@@ -298,6 +300,8 @@ def set_function_name(f, name, cls):
         f.__name__ = name
         return f
 
+    class ResourceWarning(Warning): pass
+
 string_and_binary_types = string_types + (binary_type,)
 
 
diff --git a/pandas/io/common.py b/pandas/io/common.py
@@ -428,7 +428,7 @@ def _get_handle(path, mode, encoding=None, compression=None):
     return f
 
 
-class UTF8Recoder:
+class UTF8Recoder(BaseIterator):
 
     """
     Iterator that reads an encoded stream and reencodes the input to UTF-8
@@ -437,9 +437,6 @@ class UTF8Recoder:
     def __init__(self, f, encoding):
         self.reader = codecs.getreader(encoding)(f)
 
-    def __iter__(self):
-        return self
-
     def read(self, bytes=-1):
         return self.reader.read(bytes).encode("utf-8")
 
@@ -449,9 +446,6 @@ def readline(self):
     def next(self):
         return next(self.reader).encode("utf-8")
 
-    # Python 3 iterator
-    __next__ = next
-
 
 if compat.PY3:  # pragma: no cover
     def UnicodeReader(f, dialect=csv.excel, encoding="utf-8", **kwds):
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
@@ -623,6 +623,12 @@ def __init__(self, f, engine=None, **kwds):
 
         self._make_engine(self.engine)
 
+    def close(self):
+        try:
+            self._engine._reader.close()
+        except:
+            pass
+
     def _get_options_with_defaults(self, engine):
         kwds = self.orig_options
 
@@ -875,6 +881,9 @@ def __init__(self, kwds):
 
         self._first_chunk = True
 
+    def close(self):
+        self._reader.close()
+
     @property
     def _has_complex_date_col(self):
         return (isinstance(self.parse_dates, dict) or
diff --git a/pandas/io/stata.py b/pandas/io/stata.py
@@ -161,9 +161,15 @@ def read_stata(filepath_or_buffer, convert_dates=True,
                          chunksize=chunksize, encoding=encoding)
 
     if iterator or chunksize:
-        return reader
-
-    return reader.read()
+        try:
+            return reader
+        except StopIteration:
+            reader.close()
+
+    try:
+        return reader.read()
+    finally:
+        reader.close()
 
 _date_formats = ["%tc", "%tC", "%td", "%d", "%tw", "%tm", "%tq", "%th", "%ty"]
 
diff --git a/pandas/io/tests/test_excel.py b/pandas/io/tests/test_excel.py
@@ -429,6 +429,9 @@ def test_read_one_empty_col_no_header(self):
         tm.assert_frame_equal(actual_header_zero, expected)
 
     def test_read_one_empty_col_with_header(self):
+        _skip_if_no_xlwt()
+        _skip_if_no_openpyxl()
+
         df = pd.DataFrame(
             [["", 1, 100],
              ["", 2, 200],
diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py
@@ -2071,6 +2071,7 @@ def test_utf16_bom_skiprows(self):
                                            sep=sep)
                     expected = self.read_csv(s, encoding='utf-8', skiprows=2,
                                              sep=sep)
+                    s.close()
 
                     tm.assert_frame_equal(result, expected)
 
diff --git a/pandas/io/tests/test_stata.py b/pandas/io/tests/test_stata.py
@@ -1024,28 +1024,29 @@ def test_read_chunks_117(self):
                                 check_datetimelike_compat=True)
 
                             pos += chunksize
+                        itr.close()
 
     def test_iterator(self):
 
         fname = self.dta3_117
 
         parsed = read_stata(fname)
 
-        itr = read_stata(fname, iterator=True)
-        chunk = itr.read(5)
-        tm.assert_frame_equal(parsed.iloc[0:5, :], chunk)
+        with read_stata(fname, iterator=True) as itr:
+            chunk = itr.read(5)
+            tm.assert_frame_equal(parsed.iloc[0:5, :], chunk)
 
-        itr = read_stata(fname, chunksize=5)
-        chunk = list(itr)
-        tm.assert_frame_equal(parsed.iloc[0:5, :], chunk[0])
+        with read_stata(fname, chunksize=5) as itr:
+            chunk = list(itr)
+            tm.assert_frame_equal(parsed.iloc[0:5, :], chunk[0])
 
-        itr = read_stata(fname, iterator=True)
-        chunk = itr.get_chunk(5)
-        tm.assert_frame_equal(parsed.iloc[0:5, :], chunk)
+        with read_stata(fname, iterator=True) as itr:
+            chunk = itr.get_chunk(5)
+            tm.assert_frame_equal(parsed.iloc[0:5, :], chunk)
 
-        itr = read_stata(fname, chunksize=5)
-        chunk = itr.get_chunk()
-        tm.assert_frame_equal(parsed.iloc[0:5, :], chunk)
+        with read_stata(fname, chunksize=5) as itr:
+            chunk = itr.get_chunk()
+            tm.assert_frame_equal(parsed.iloc[0:5, :], chunk)
 
         # GH12153
         from_chunks = pd.concat(read_stata(fname, chunksize=4))
@@ -1089,22 +1090,23 @@ def test_read_chunks_115(self):
                                 check_datetimelike_compat=True)
 
                             pos += chunksize
+                        itr.close()
 
     def test_read_chunks_columns(self):
         fname = self.dta3_117
         columns = ['quarter', 'cpi', 'm1']
         chunksize = 2
 
         parsed = read_stata(fname, columns=columns)
-        itr = read_stata(fname, iterator=True)
-        pos = 0
-        for j in range(5):
-            chunk = itr.read(chunksize, columns=columns)
-            if chunk is None:
-                break
-            from_frame = parsed.iloc[pos:pos + chunksize, :]
-            tm.assert_frame_equal(from_frame, chunk, check_dtype=False)
-            pos += chunksize
+        with read_stata(fname, iterator=True) as itr:
+            pos = 0
+            for j in range(5):
+                chunk = itr.read(chunksize, columns=columns)
+                if chunk is None:
+                    break
+                from_frame = parsed.iloc[pos:pos + chunksize, :]
+                tm.assert_frame_equal(from_frame, chunk, check_dtype=False)
+                pos += chunksize
 
 
 if __name__ == '__main__':
diff --git a/pandas/parser.pyx b/pandas/parser.pyx
@@ -258,6 +258,7 @@ cdef class TextReader:
         parser_t *parser
         object file_handle, na_fvalues
         object true_values, false_values
+        object dsource
         bint na_filter, verbose, has_usecols, has_mi_columns
         int parser_start
         list clocks
@@ -535,6 +536,15 @@ cdef class TextReader:
         kh_destroy_str(self.true_set)
         kh_destroy_str(self.false_set)
 
+    def close(self):
+        # we need to properly close an open derived
+        # filehandle here, e.g. and UTFRecoder
+        if self.dsource is not None:
+            try:
+                self.dsource.close()
+            except:
+                pass
+
     def set_error_bad_lines(self, int status):
         self.parser.error_bad_lines = status
 
@@ -635,6 +645,8 @@ cdef class TextReader:
             raise IOError('Expected file path name or file-like object,'
                           ' got %s type' % type(source))
 
+        self.dsource = source
+
     cdef _get_header(self):
         # header is now a list of lists, so field_count should use header[0]
 
diff --git a/pandas/util/testing.py b/pandas/util/testing.py
@@ -33,7 +33,8 @@
 import pandas.lib as lib
 from pandas.compat import(
     filter, map, zip, range, unichr, lrange, lmap, lzip, u, callable, Counter,
-    raise_with_traceback, httplib, is_platform_windows, is_platform_32bit
+    raise_with_traceback, httplib, is_platform_windows, is_platform_32bit,
+    PY3
 )
 
 from pandas.computation import expressions as expr
@@ -51,18 +52,22 @@
 
 
 # set testing_mode
+_testing_mode_warnings = (DeprecationWarning, compat.ResourceWarning)
+
+
 def set_testing_mode():
     # set the testing mode filters
     testing_mode = os.environ.get('PANDAS_TESTING_MODE', 'None')
     if 'deprecate' in testing_mode:
-        warnings.simplefilter('always', DeprecationWarning)
+
+        warnings.simplefilter('always', _testing_mode_warnings)
 
 
 def reset_testing_mode():
     # reset the testing mode filters
     testing_mode = os.environ.get('PANDAS_TESTING_MODE', 'None')
     if 'deprecate' in testing_mode:
-        warnings.simplefilter('ignore', DeprecationWarning)
+        warnings.simplefilter('ignore', _testing_mode_warnings)
 
 set_testing_mode()
 
@@ -286,7 +291,7 @@ def _skip_if_no_dateutil():
 
 
 def _skip_if_windows_python_3():
-    if compat.PY3 and is_platform_windows():
+    if PY3 and is_platform_windows():
         import nose
         raise nose.SkipTest("not used on python 3/win32")
 
@@ -437,7 +442,7 @@ def get_locales(prefix=None, normalize=True,
         raw_locales = raw_locales.split(b'\n')
         out_locales = []
         for x in raw_locales:
-            if compat.PY3:
+            if PY3:
                 out_locales.append(str(
                     x, encoding=pd.options.display.encoding))
             else: