From 8eb547ec0bd24b5b9773864e3dee76c1e47db286 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 12 Aug 2020 10:12:40 -0700 Subject: [PATCH 01/10] CI: avoid file leaks in sas_xport tests --- pandas/tests/io/sas/test_xport.py | 7 +++++++ pandas/util/_test_decorators.py | 21 ++++++++++++++++++++- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/pandas/tests/io/sas/test_xport.py b/pandas/tests/io/sas/test_xport.py index 2682bafedb8f1..4dba16e88c437 100644 --- a/pandas/tests/io/sas/test_xport.py +++ b/pandas/tests/io/sas/test_xport.py @@ -3,6 +3,8 @@ import numpy as np import pytest +import pandas.util._test_decorators as td + import pandas as pd import pandas._testing as tm @@ -30,6 +32,11 @@ def setup_method(self, datapath): self.file03 = os.path.join(self.dirpath, "DRXFCD_G.xpt") self.file04 = os.path.join(self.dirpath, "paxraw_d_short.xpt") + with td.file_leak_context(): + yield + + self.file02b.close() + def test1_basic(self): # Tests with DEMO_G.xpt (all numeric file) diff --git a/pandas/util/_test_decorators.py b/pandas/util/_test_decorators.py index bdf633839b2cd..e617fe426cfdf 100644 --- a/pandas/util/_test_decorators.py +++ b/pandas/util/_test_decorators.py @@ -23,6 +23,7 @@ def test_foo(): For more information, refer to the ``pytest`` documentation on ``skipif``. """ +from contextlib import contextmanager from distutils.version import LooseVersion from functools import wraps import locale @@ -237,7 +238,7 @@ def documented_fixture(fixture): def check_file_leaks(func) -> Callable: """ - Decorate a test function tot check that we are not leaking file descriptors. + Decorate a test function to check that we are not leaking file descriptors. """ psutil = safe_import("psutil") if not psutil: @@ -256,6 +257,24 @@ def new_func(*args, **kwargs): return new_func +@contextmanager +def file_leak_context(): + """ + ContextManager analogue to check_file_leaks. + """ + psutil = safe_import("psutil") + if not psutil: + yield + else: + proc = psutil.Process() + flist = proc.open_files() + + yield + + flist2 = proc.open_files() + assert flist2 == flist, (flist2, flist) + + def async_mark(): try: import_optional_dependency("pytest_asyncio") From bd953bf9dc3e5b9c44e5e96bab3582dff31d2d86 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 12 Aug 2020 11:01:57 -0700 Subject: [PATCH 02/10] CLN: reuse context inside decorator --- pandas/util/_test_decorators.py | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/pandas/util/_test_decorators.py b/pandas/util/_test_decorators.py index e617fe426cfdf..08c84528fcc87 100644 --- a/pandas/util/_test_decorators.py +++ b/pandas/util/_test_decorators.py @@ -25,7 +25,6 @@ def test_foo(): """ from contextlib import contextmanager from distutils.version import LooseVersion -from functools import wraps import locale from typing import Callable, Optional @@ -240,22 +239,9 @@ def check_file_leaks(func) -> Callable: """ Decorate a test function to check that we are not leaking file descriptors. """ - psutil = safe_import("psutil") - if not psutil: + with file_leak_context(): return func - @wraps(func) - def new_func(*args, **kwargs): - proc = psutil.Process() - flist = proc.open_files() - - func(*args, **kwargs) - - flist2 = proc.open_files() - assert flist2 == flist - - return new_func - @contextmanager def file_leak_context(): From c11444971a5ca2b4f1f237f568c663a47247068b Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 12 Aug 2020 14:50:21 -0700 Subject: [PATCH 03/10] TST: check for leaked socket connections --- pandas/util/_test_decorators.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/util/_test_decorators.py b/pandas/util/_test_decorators.py index 08c84528fcc87..c56d0ad3e9472 100644 --- a/pandas/util/_test_decorators.py +++ b/pandas/util/_test_decorators.py @@ -254,12 +254,16 @@ def file_leak_context(): else: proc = psutil.Process() flist = proc.open_files() + conns = proc.connections() yield flist2 = proc.open_files() assert flist2 == flist, (flist2, flist) + conns2 = proc.connections() + assert conns2 == conns, (conns2, conns) + def async_mark(): try: From c2a362c89fb489e4af90423e8661d876044a4a00 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 12 Aug 2020 16:13:52 -0700 Subject: [PATCH 04/10] open file later --- pandas/tests/io/sas/test_xport.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pandas/tests/io/sas/test_xport.py b/pandas/tests/io/sas/test_xport.py index 4dba16e88c437..ae2022e5cabbe 100644 --- a/pandas/tests/io/sas/test_xport.py +++ b/pandas/tests/io/sas/test_xport.py @@ -28,15 +28,12 @@ def setup_method(self, datapath): self.dirpath = datapath("io", "sas", "data") self.file01 = os.path.join(self.dirpath, "DEMO_G.xpt") self.file02 = os.path.join(self.dirpath, "SSHSV1_A.xpt") - self.file02b = open(os.path.join(self.dirpath, "SSHSV1_A.xpt"), "rb") self.file03 = os.path.join(self.dirpath, "DRXFCD_G.xpt") self.file04 = os.path.join(self.dirpath, "paxraw_d_short.xpt") with td.file_leak_context(): yield - self.file02b.close() - def test1_basic(self): # Tests with DEMO_G.xpt (all numeric file) @@ -134,7 +131,8 @@ def test2_binary(self): data_csv = pd.read_csv(self.file02.replace(".xpt", ".csv")) numeric_as_float(data_csv) - data = read_sas(self.file02b, format="xport") + with open(self.file02, "rb") as fd: + data = read_sas(fd, format="xport") tm.assert_frame_equal(data, data_csv) def test_multiple_types(self): From e75a15bfe9b3b0b458c127f3d80f4787559abc0f Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 13 Aug 2020 10:31:08 -0700 Subject: [PATCH 05/10] Fix incorrect file closing in read_sas --- pandas/io/sas/sasreader.py | 10 ++++++++-- pandas/tests/io/sas/test_xport.py | 6 +++++- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/pandas/io/sas/sasreader.py b/pandas/io/sas/sasreader.py index 291c9d1ee7f0c..fffdebda8c87a 100644 --- a/pandas/io/sas/sasreader.py +++ b/pandas/io/sas/sasreader.py @@ -6,7 +6,7 @@ from pandas._typing import FilePathOrBuffer, Label -from pandas.io.common import stringify_path +from pandas.io.common import get_filepath_or_buffer, stringify_path if TYPE_CHECKING: from pandas import DataFrame # noqa: F401 @@ -109,6 +109,10 @@ def read_sas( else: raise ValueError("unable to infer format of SAS file") + filepath_or_buffer, _, _, should_close = get_filepath_or_buffer( + filepath_or_buffer, encoding + ) + reader: ReaderBase if format.lower() == "xport": from pandas.io.sas.sas_xport import XportReader @@ -129,5 +133,7 @@ def read_sas( return reader data = reader.read() - reader.close() + + if should_close: + reader.close() return data diff --git a/pandas/tests/io/sas/test_xport.py b/pandas/tests/io/sas/test_xport.py index ae2022e5cabbe..939edb3d8e0b4 100644 --- a/pandas/tests/io/sas/test_xport.py +++ b/pandas/tests/io/sas/test_xport.py @@ -132,7 +132,11 @@ def test2_binary(self): numeric_as_float(data_csv) with open(self.file02, "rb") as fd: - data = read_sas(fd, format="xport") + with td.file_leak_context(): + # GH#35693 ensure that if we pass an open file, we + # dont incorrectly close it in read_sas + data = read_sas(fd, format="xport") + tm.assert_frame_equal(data, data_csv) def test_multiple_types(self): From f337be41f12a864b16413ea5437686a05029f69a Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 12 Aug 2020 16:10:58 -0700 Subject: [PATCH 06/10] TST: make check_file_leaks an auto-use fixture for all tests --- pandas/conftest.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/pandas/conftest.py b/pandas/conftest.py index 97cc514e31bb3..baca786801685 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -157,6 +157,15 @@ def add_imports(doctest_namespace): doctest_namespace["pd"] = pd +@pytest.fixture(autouse=True) +def check_file_leaks(): + """ + Check that a test does not leak file handles. + """ + with td.file_leak_context(): + yield + + # ---------------------------------------------------------------- # Common arguments # ---------------------------------------------------------------- From 430567b23f413a73ba641886049c95a37457107b Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 14 Aug 2020 10:38:29 -0700 Subject: [PATCH 07/10] BUG: unclosed file handle in mmap --- pandas/io/common.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/io/common.py b/pandas/io/common.py index 54f35e689aac8..5ccd992b10704 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -535,6 +535,8 @@ def get_handle( try: wrapped = _MMapWrapper(f) f.close() + handles.remove(f) + handles.append(wrapped) f = wrapped except Exception: # we catch any errors that may have occurred From 1c3bf7351446cb7cc6f24858011ffedc51ba77f2 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 14 Aug 2020 16:21:53 -0700 Subject: [PATCH 08/10] TST: close connections opened by sqlalchemy --- pandas/tests/io/test_sql.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 29b787d39c09d..a7e3162ed7b73 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -263,7 +263,8 @@ def _get_all_tables(self): return table_list def _close_conn(self): - pass + # https://docs.sqlalchemy.org/en/13/core/connections.html#engine-disposal + self.conn.dispose() class PandasSQLTest: @@ -1242,7 +1243,7 @@ class _TestSQLAlchemy(SQLAlchemyMixIn, PandasSQLTest): def setup_class(cls): cls.setup_import() cls.setup_driver() - conn = cls.connect() + conn = cls.conn = cls.connect() conn.connect() def load_test_data_and_sql(self): From 415294c26eb93202bc6a284de185949610a937b3 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 17 Aug 2020 08:50:43 -0700 Subject: [PATCH 09/10] Move fixture to tests.io.conftest --- pandas/conftest.py | 9 --------- pandas/io/common.py | 3 ++- pandas/tests/io/conftest.py | 11 +++++++++++ pandas/tests/io/excel/test_readers.py | 2 -- 4 files changed, 13 insertions(+), 12 deletions(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index baca786801685..97cc514e31bb3 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -157,15 +157,6 @@ def add_imports(doctest_namespace): doctest_namespace["pd"] = pd -@pytest.fixture(autouse=True) -def check_file_leaks(): - """ - Check that a test does not leak file handles. - """ - with td.file_leak_context(): - yield - - # ---------------------------------------------------------------- # Common arguments # ---------------------------------------------------------------- diff --git a/pandas/io/common.py b/pandas/io/common.py index 5ccd992b10704..d1305c9cabe0e 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -18,6 +18,7 @@ Optional, Tuple, Type, + Union, ) from urllib.parse import ( urljoin, @@ -452,7 +453,7 @@ def get_handle( except ImportError: need_text_wrapping = (BufferedIOBase, RawIOBase) - handles: List[IO] = list() + handles: List[Union[IO, _MMapWrapper]] = list() f = path_or_buf # Convert pathlib.Path/py.path.local or string diff --git a/pandas/tests/io/conftest.py b/pandas/tests/io/conftest.py index fcee25c258efa..39b9a1cd4fa28 100644 --- a/pandas/tests/io/conftest.py +++ b/pandas/tests/io/conftest.py @@ -2,11 +2,22 @@ import pytest +import pandas.util._test_decorators as td + import pandas._testing as tm from pandas.io.parsers import read_csv +@pytest.fixture(autouse=True) +def check_file_leaks(): + """ + Check that a test does not leak file handles. + """ + with td.file_leak_context(): + yield + + @pytest.fixture def tips_file(datapath): """Path to the tips dataset""" diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index 51fbbf836a03f..49415a7e0a126 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -650,7 +650,6 @@ def test_read_from_pathlib_path(self, read_ext): tm.assert_frame_equal(expected, actual) @td.skip_if_no("py.path") - @td.check_file_leaks def test_read_from_py_localpath(self, read_ext): # GH12655 @@ -664,7 +663,6 @@ def test_read_from_py_localpath(self, read_ext): tm.assert_frame_equal(expected, actual) - @td.check_file_leaks def test_close_from_py_localpath(self, read_ext): # GH31467 From 13f75406392eda23104e5c73cceb51a05a7b86d1 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 18 Aug 2020 10:19:27 -0700 Subject: [PATCH 10/10] Avoid sqlite file leak in ipython test --- pandas/conftest.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index 97cc514e31bb3..8d3c9d4dac400 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -1181,7 +1181,13 @@ def ip(): pytest.importorskip("IPython", minversion="6.0.0") from IPython.core.interactiveshell import InteractiveShell - return InteractiveShell() + # GH#35711 make sure sqlite history file handle is not leaked + from traitlets.config import Config + + c = Config() + c.HistoryManager.hist_file = ":memory:" + + return InteractiveShell(config=c) @pytest.fixture(params=["bsr", "coo", "csc", "csr", "dia", "dok", "lil"])