pandas-dev · twoertwein · Feb 13, 2021 · Mar 31, 2021 · jbrockmendel · Jan 26, 2021
diff --git a/ci/deps/actions-37-db.yaml b/ci/deps/actions-37-db.yaml
@@ -51,4 +51,4 @@ dependencies:
     - brotlipy
     - coverage
     - pandas-datareader
-    - pyxlsb
+    - pyxlsb>=1.0.8
diff --git a/ci/deps/actions-37-minimum_versions.yaml b/ci/deps/actions-37-minimum_versions.yaml
@@ -24,6 +24,7 @@ dependencies:
   - python-dateutil=2.7.3
   - pytz=2017.3
   - pyarrow=0.15
+  - pyxlsb>=1.0.8
   - scipy=1.2
   - xlrd=1.2.0
   - xlsxwriter=1.0.2

diff --git a/ci/deps/actions-38-locale.yaml b/ci/deps/actions-38-locale.yaml
@@ -38,4 +38,4 @@ dependencies:
   - pyarrow=1.0.0
   - pip
   - pip:
-    - pyxlsb
+    - pyxlsb>=1.0.8
diff --git a/ci/deps/azure-macos-37.yaml b/ci/deps/azure-macos-37.yaml
@@ -33,4 +33,4 @@ dependencies:
   - pip:
     - cython>=0.29.21
     - pyreadstat
-    - pyxlsb
+    - pyxlsb>=1.0.8
diff --git a/ci/deps/azure-windows-37.yaml b/ci/deps/azure-windows-37.yaml
@@ -39,4 +39,4 @@ dependencies:
   - pyreadstat
   - pip
   - pip:
-    - pyxlsb
+    - pyxlsb>=1.0.8
diff --git a/ci/run_tests.sh b/ci/run_tests.sh
@@ -1,5 +1,7 @@
 #!/bin/bash -e
 
+PYTEST_WORKERS=0
+
 # Workaround for pytest-xdist (it collects different tests in the workers if PYTHONHASHSEED is not set)
 # https://github.com/pytest-dev/pytest/issues/920
 # https://github.com/pytest-dev/pytest/issues/1075
@@ -19,7 +21,7 @@ if [[ $(uname) == "Linux" && -z $DISPLAY ]]; then
     XVFB="xvfb-run "
 fi
 
-PYTEST_CMD="${XVFB}pytest -m \"$PATTERN\" -n $PYTEST_WORKERS --dist=loadfile -s --strict-markers --durations=30 --junitxml=test-data.xml $TEST_ARGS $COVERAGE pandas"
+PYTEST_CMD="${XVFB}pytest -m \"$PATTERN\" -n 0 --dist=no -s --strict-markers --durations=30 --junitxml=test-data.xml $TEST_ARGS $COVERAGE pandas"
 
 if [[ $(uname) != "Linux"  && $(uname) != "Darwin" ]]; then
     # GH#37455 windows py38 build appears to be running out of memory
@@ -30,7 +32,7 @@ fi
 echo $PYTEST_CMD
 sh -c "$PYTEST_CMD"
 
-PYTEST_AM_CMD="PANDAS_DATA_MANAGER=array pytest -m \"$PATTERN and arraymanager\" -n $PYTEST_WORKERS  --dist=loadfile -s --strict-markers --durations=30 --junitxml=test-data.xml $TEST_ARGS $COVERAGE pandas"
+PYTEST_AM_CMD="PANDAS_DATA_MANAGER=array pytest -m \"$PATTERN and arraymanager\" -n 0  --dist=no -s --strict-markers --durations=30 --junitxml=test-data.xml $TEST_ARGS $COVERAGE pandas"
 
 echo $PYTEST_AM_CMD
 sh -c "$PYTEST_AM_CMD"
diff --git a/doc/source/getting_started/install.rst b/doc/source/getting_started/install.rst
@@ -275,6 +275,17 @@ Dependency                Minimum Version    Notes
 SciPy                     1.12.0             Miscellaneous statistical functions
 numba                     0.46.0             Alternative execution engine for rolling operations
                                              (see :ref:`Enhancing Performance <enhancingperf.numba>`)
+openpyxl                  2.6.0              Reading / writing for xlsx files
+pandas-gbq                0.12.0             Google Big Query access
+psycopg2                  2.7                PostgreSQL engine for sqlalchemy
+pyarrow                   0.15.0             Parquet, ORC, and feather reading / writing
+pymysql                   0.8.1              MySQL engine for sqlalchemy
+pyreadstat                                   SPSS files (.sav) reading
+pyxlsb                    1.0.8              Reading for xlsb files
+qtpy                                         Clipboard I/O
+s3fs                      0.4.0              Amazon S3 access
+tabulate                  0.8.7              Printing in Markdown-friendly format (see `tabulate`_)
+>>>>>>> TST: extend check for leaked files
 xarray                    0.12.3             pandas-like API for N-dimensional data
 ========================= ================== =============================================================
 

diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
@@ -431,6 +431,8 @@ Optional libraries below the lowest tested version may still work, but are not c
 +-----------------+-----------------+---------+
 | pytables        | 3.5.1           |         |
 +-----------------+-----------------+---------+
+| pyxlsb          | 1.0.8           |    X    |
++-----------------+-----------------+---------+
 | s3fs            | 0.4.0           |         |
 +-----------------+-----------------+---------+
 | scipy           | 1.2.0           |         |

diff --git a/pandas/compat/_optional.py b/pandas/compat/_optional.py
@@ -21,7 +21,7 @@
     "pandas_gbq": "0.12.0",
     "pyarrow": "0.15.0",
     "pytest": "5.0.1",
-    "pyxlsb": "1.0.6",
+    "pyxlsb": "1.0.8",
     "s3fs": "0.4.0",
     "scipy": "1.2.0",
     "sqlalchemy": "1.2.8",

diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py
@@ -269,7 +269,7 @@ def _check_f(base, f):
         _check_f(d.copy(), f)
 
     @async_mark()
-    @td.check_file_leaks
+    @td.check_file_leaks()
     async def test_tab_complete_warning(self, ip, frame_or_series):
         # GH 16409
         pytest.importorskip("IPython", minversion="6.0.0")

diff --git a/pandas/tests/io/conftest.py b/pandas/tests/io/conftest.py
@@ -6,6 +6,8 @@
 
 import pytest
 
+import pandas.util._test_decorators as td
+
 import pandas._testing as tm
 
 from pandas.io.parsers import read_csv
@@ -163,3 +165,17 @@ def add_tips_files(bucket_name):
     while cli.list_buckets()["Buckets"] and timeout > 0:
         time.sleep(0.1)
         timeout -= 0.1
+
+
+@pytest.fixture(autouse=True)
+def check_for_file_leaks(request):
+    """
+    Fixture to run around every test to ensure that we are not leaking files.
+
+    See also
+    --------
+    _test_decorators.check_file_leaks
+    """
+    # GH#30162
+    with td.check_file_leaks(ignore_connections=True):
+        yield
diff --git a/pandas/tests/io/excel/conftest.py b/pandas/tests/io/excel/conftest.py
@@ -43,23 +43,9 @@ def read_ext(request):
     return request.param
 
 
+# override auto-fixture to also check connections
 @pytest.fixture(autouse=True)
-def check_for_file_leaks():
-    """
-    Fixture to run around every test to ensure that we are not leaking files.
-
-    See also
-    --------
-    _test_decorators.check_file_leaks
-    """
+def check_for_file_leaks(request):
     # GH#30162
-    psutil = td.safe_import("psutil")
-    if not psutil:
-        yield
-
-    else:
-        proc = psutil.Process()
-        flist = proc.open_files()
+    with td.check_file_leaks(ignore_connections=False):
         yield
-        flist2 = proc.open_files()
-        assert flist == flist2
diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py
@@ -797,7 +797,6 @@ def test_read_from_pathlib_path(self, read_ext):
         tm.assert_frame_equal(expected, actual)
 
     @td.skip_if_no("py.path")
-    @td.check_file_leaks
     def test_read_from_py_localpath(self, read_ext):
 
         # GH12655
@@ -811,7 +810,6 @@ def test_read_from_py_localpath(self, read_ext):
 
         tm.assert_frame_equal(expected, actual)
 
-    @td.check_file_leaks
     def test_close_from_py_localpath(self, read_ext):
 
         # GH31467

diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py
@@ -3266,7 +3266,7 @@ def test_format_percentiles_integer_idx():
     assert result == expected
 
 
-@td.check_file_leaks
+@td.check_file_leaks()
 def test_repr_html_ipython_config(ip):
     code = textwrap.dedent(
         """\

diff --git a/pandas/tests/io/parser/common/test_file_buffer_url.py b/pandas/tests/io/parser/common/test_file_buffer_url.py
@@ -417,15 +417,12 @@ def test_file_descriptor_leak(all_parsers):
 
     parser = all_parsers
     with tm.ensure_clean() as path:
-
-        def test():
+        with td.check_file_leaks():
             with pytest.raises(EmptyDataError, match="No columns to parse from file"):
                 parser.read_csv(path)
 
-        td.check_file_leaks(test)()
-
 
-@td.check_file_leaks
+@td.check_file_leaks()
 def test_memory_map(all_parsers, csv_dir_path):
     mmap_file = os.path.join(csv_dir_path, "test_mmap.csv")
     parser = all_parsers

diff --git a/pandas/tests/io/parser/common/test_read_errors.py b/pandas/tests/io/parser/common/test_read_errors.py
@@ -2,6 +2,7 @@
 Tests that work on both the Python and C engines but do not have a
 specific classification into the other test modules.
 """
+
 import codecs
 import csv
 from io import StringIO
@@ -217,7 +218,7 @@ def test_null_byte_char(all_parsers):
             parser.read_csv(StringIO(data), names=names)
 
 
-@td.check_file_leaks
+@td.check_file_leaks()
 def test_open_file(all_parsers):
     # GH 39024
     parser = all_parsers

diff --git a/pandas/tests/io/sas/test_xport.py b/pandas/tests/io/sas/test_xport.py
@@ -31,7 +31,7 @@ def setup_method(self, datapath):
         self.file03 = os.path.join(self.dirpath, "DRXFCD_G.xpt")
         self.file04 = os.path.join(self.dirpath, "paxraw_d_short.xpt")
 
-        with td.file_leak_context():
+        with td.check_file_leaks():
             yield
 
     def test1_basic(self):
@@ -129,10 +129,9 @@ def test2_binary(self):
         numeric_as_float(data_csv)
 
         with open(self.file02, "rb") as fd:
-            with td.file_leak_context():
-                # GH#35693 ensure that if we pass an open file, we
-                #  dont incorrectly close it in read_sas
-                data = read_sas(fd, format="xport")
+            # GH#35693 ensure that if we pass an open file, we
+            #  dont incorrectly close it in read_sas
+            data = read_sas(fd, format="xport")
 
         tm.assert_frame_equal(data, data_csv)
 

diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py
@@ -524,3 +524,13 @@ def test_bad_encdoing_errors():
     with tm.ensure_clean() as path:
         with pytest.raises(ValueError, match="Invalid value for `encoding_errors`"):
             icom.get_handle(path, "w", errors="bad")
+
+
+def test_resource_warnings():
+    msg = '[ResourceWarning("unclosed file *)]'
+    with tm.ensure_clean("_resource_test") as path:
+        with pytest.raises(AssertionError, match=msg):
+            with td.check_file_leaks():
+                handle = open(path)
+        # prevent the auto fixture from throwing an AssertionError
+        handle.close()
diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py
@@ -23,7 +23,7 @@
 
 
 @async_mark()
-@td.check_file_leaks
+@td.check_file_leaks()
 async def test_tab_complete_ipython6_warning(ip):
     from IPython.core.completer import provisionalcompleter
 

diff --git a/pandas/util/_test_decorators.py b/pandas/util/_test_decorators.py
@@ -23,14 +23,11 @@ def test_foo():
 
 For more information, refer to the ``pytest`` documentation on ``skipif``.
 """
-from contextlib import contextmanager
-from distutils.version import LooseVersion
 import locale
-from typing import (
-    Callable,
-    Optional,
-)
 import warnings
+from contextlib import ContextDecorator
+from distutils.version import LooseVersion
+from typing import Callable, Optional
 
 import numpy as np
 import pytest
@@ -42,11 +39,7 @@ def test_foo():
     is_platform_windows,
 )
 from pandas.compat._optional import import_optional_dependency
-
-from pandas.core.computation.expressions import (
-    NUMEXPR_INSTALLED,
-    USE_NUMEXPR,
-)
+from pandas.core.computation.expressions import NUMEXPR_INSTALLED, USE_NUMEXPR
 
 
 def safe_import(mod_name: str, min_version: Optional[str] = None):
@@ -245,38 +238,67 @@ def documented_fixture(fixture):
     return documented_fixture
 
 
-def check_file_leaks(func) -> Callable:
-    """
-    Decorate a test function to check that we are not leaking file descriptors.
+class check_file_leaks(ContextDecorator):
     """
-    with file_leak_context():
-        return func
+    Use psutil and ResourceWarning to identify forgotten resources.
 
-
-@contextmanager
-def file_leak_context():
-    """
-    ContextManager analogue to check_file_leaks.
+    ResourceWarnings that contain the string 'ssl' are ignored as they are very likely
+    caused by boto3 (GH#17058).
     """
-    psutil = safe_import("psutil")
-    if not psutil:
-        yield
-    else:
-        proc = psutil.Process()
-        flist = proc.open_files()
-        conns = proc.connections()
-
-        yield
-
-        flist2 = proc.open_files()
-        # on some builds open_files includes file position, which we _dont_
-        #  expect to remain unchanged, so we need to compare excluding that
-        flist_ex = [(x.path, x.fd) for x in flist]
-        flist2_ex = [(x.path, x.fd) for x in flist2]
-        assert flist2_ex == flist_ex, (flist2, flist)
 
-        conns2 = proc.connections()
-        assert conns2 == conns, (conns2, conns)
+    def __init__(self, ignore_connections: bool = False):
+        super().__init__()
+        self.ignore_connections = ignore_connections
+
+    def __enter__(self):
+        # catch warnings
+        self.catcher = warnings.catch_warnings(record=True)
+        self.record = self.catcher.__enter__()
+
+        # get files and connections
+        self.psutil = safe_import("psutil")
+        if self.psutil:
+            self.proc = self.psutil.Process()
+            self.flist = self.proc.open_files()
+            self.conns = self.proc.connections()
+
+        return self
+
+    def __exit__(self, *exc):
+        self.catcher.__exit__(*exc)
+
+        # re-throw warnings
+        fields = ("category", "source", "filename", "lineno")
+        for message in self.record:
+            warnings.warn_explicit(
+                message.message, **{field: getattr(message, field) for field in fields}
+            )
+
+        # assert no non-ssl ResourceWarnings
+        messages = [
+            warn.message
+            for warn in self.record
+            if issubclass(warn.category, ResourceWarning)
+            and "ssl" not in str(warn.message)
+        ]
+        assert not messages, f"{messages}"
+
+        # psutil
+        if self.psutil:
+            flist2 = self.proc.open_files()
+
+            # on some builds open_files includes file position, which we _dont_
+            # expect to remain unchanged, so we need to compare excluding that
+            flist_ex = {(x.path, x.fd) for x in self.flist}
+            flist2_ex = {(x.path, x.fd) for x in flist2}
+            assert (
+                flist2_ex == flist_ex
+            ), f"{flist_ex - flist2_ex} {flist2_ex - flist_ex}"
+
+            if not self.ignore_connections:
+                conns = set(self.conns)
+                conns2 = set(self.proc.connections())
+                assert conns2 == conns, f"{conns - conns2} {conns2 - conns}"
 
 
 def async_mark():

diff --git a/setup.cfg b/setup.cfg
@@ -118,6 +118,7 @@ xfail_strict = True
 filterwarnings =
     error:Sparse:FutureWarning
     error:The SparseArray:FutureWarning
+    always::ResourceWarning
 junit_family = xunit2
 
 [codespell]
-Original file line number
+Diff line change
@@ Expand Up / @@ -38,4 +38,4 @@ dependencies: @@
       - pyarrow=1.0.0
       - pip
       - pip:
-        - pyxlsb
+        - pyxlsb>=1.0.8