From 8eb547ec0bd24b5b9773864e3dee76c1e47db286 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 12 Aug 2020 10:12:40 -0700 Subject: [PATCH 01/14] CI: avoid file leaks in sas_xport tests --- pandas/tests/io/sas/test_xport.py | 7 +++++++ pandas/util/_test_decorators.py | 21 ++++++++++++++++++++- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/pandas/tests/io/sas/test_xport.py b/pandas/tests/io/sas/test_xport.py index 2682bafedb8f1..4dba16e88c437 100644 --- a/pandas/tests/io/sas/test_xport.py +++ b/pandas/tests/io/sas/test_xport.py @@ -3,6 +3,8 @@ import numpy as np import pytest +import pandas.util._test_decorators as td + import pandas as pd import pandas._testing as tm @@ -30,6 +32,11 @@ def setup_method(self, datapath): self.file03 = os.path.join(self.dirpath, "DRXFCD_G.xpt") self.file04 = os.path.join(self.dirpath, "paxraw_d_short.xpt") + with td.file_leak_context(): + yield + + self.file02b.close() + def test1_basic(self): # Tests with DEMO_G.xpt (all numeric file) diff --git a/pandas/util/_test_decorators.py b/pandas/util/_test_decorators.py index bdf633839b2cd..e617fe426cfdf 100644 --- a/pandas/util/_test_decorators.py +++ b/pandas/util/_test_decorators.py @@ -23,6 +23,7 @@ def test_foo(): For more information, refer to the ``pytest`` documentation on ``skipif``. """ +from contextlib import contextmanager from distutils.version import LooseVersion from functools import wraps import locale @@ -237,7 +238,7 @@ def documented_fixture(fixture): def check_file_leaks(func) -> Callable: """ - Decorate a test function tot check that we are not leaking file descriptors. + Decorate a test function to check that we are not leaking file descriptors. """ psutil = safe_import("psutil") if not psutil: @@ -256,6 +257,24 @@ def new_func(*args, **kwargs): return new_func +@contextmanager +def file_leak_context(): + """ + ContextManager analogue to check_file_leaks. + """ + psutil = safe_import("psutil") + if not psutil: + yield + else: + proc = psutil.Process() + flist = proc.open_files() + + yield + + flist2 = proc.open_files() + assert flist2 == flist, (flist2, flist) + + def async_mark(): try: import_optional_dependency("pytest_asyncio") From bd953bf9dc3e5b9c44e5e96bab3582dff31d2d86 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 12 Aug 2020 11:01:57 -0700 Subject: [PATCH 02/14] CLN: reuse context inside decorator --- pandas/util/_test_decorators.py | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/pandas/util/_test_decorators.py b/pandas/util/_test_decorators.py index e617fe426cfdf..08c84528fcc87 100644 --- a/pandas/util/_test_decorators.py +++ b/pandas/util/_test_decorators.py @@ -25,7 +25,6 @@ def test_foo(): """ from contextlib import contextmanager from distutils.version import LooseVersion -from functools import wraps import locale from typing import Callable, Optional @@ -240,22 +239,9 @@ def check_file_leaks(func) -> Callable: """ Decorate a test function to check that we are not leaking file descriptors. """ - psutil = safe_import("psutil") - if not psutil: + with file_leak_context(): return func - @wraps(func) - def new_func(*args, **kwargs): - proc = psutil.Process() - flist = proc.open_files() - - func(*args, **kwargs) - - flist2 = proc.open_files() - assert flist2 == flist - - return new_func - @contextmanager def file_leak_context(): From c11444971a5ca2b4f1f237f568c663a47247068b Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 12 Aug 2020 14:50:21 -0700 Subject: [PATCH 03/14] TST: check for leaked socket connections --- pandas/util/_test_decorators.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/util/_test_decorators.py b/pandas/util/_test_decorators.py index 08c84528fcc87..c56d0ad3e9472 100644 --- a/pandas/util/_test_decorators.py +++ b/pandas/util/_test_decorators.py @@ -254,12 +254,16 @@ def file_leak_context(): else: proc = psutil.Process() flist = proc.open_files() + conns = proc.connections() yield flist2 = proc.open_files() assert flist2 == flist, (flist2, flist) + conns2 = proc.connections() + assert conns2 == conns, (conns2, conns) + def async_mark(): try: From c2a362c89fb489e4af90423e8661d876044a4a00 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 12 Aug 2020 16:13:52 -0700 Subject: [PATCH 04/14] open file later --- pandas/tests/io/sas/test_xport.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pandas/tests/io/sas/test_xport.py b/pandas/tests/io/sas/test_xport.py index 4dba16e88c437..ae2022e5cabbe 100644 --- a/pandas/tests/io/sas/test_xport.py +++ b/pandas/tests/io/sas/test_xport.py @@ -28,15 +28,12 @@ def setup_method(self, datapath): self.dirpath = datapath("io", "sas", "data") self.file01 = os.path.join(self.dirpath, "DEMO_G.xpt") self.file02 = os.path.join(self.dirpath, "SSHSV1_A.xpt") - self.file02b = open(os.path.join(self.dirpath, "SSHSV1_A.xpt"), "rb") self.file03 = os.path.join(self.dirpath, "DRXFCD_G.xpt") self.file04 = os.path.join(self.dirpath, "paxraw_d_short.xpt") with td.file_leak_context(): yield - self.file02b.close() - def test1_basic(self): # Tests with DEMO_G.xpt (all numeric file) @@ -134,7 +131,8 @@ def test2_binary(self): data_csv = pd.read_csv(self.file02.replace(".xpt", ".csv")) numeric_as_float(data_csv) - data = read_sas(self.file02b, format="xport") + with open(self.file02, "rb") as fd: + data = read_sas(fd, format="xport") tm.assert_frame_equal(data, data_csv) def test_multiple_types(self): From e75a15bfe9b3b0b458c127f3d80f4787559abc0f Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 13 Aug 2020 10:31:08 -0700 Subject: [PATCH 05/14] Fix incorrect file closing in read_sas --- pandas/io/sas/sasreader.py | 10 ++++++++-- pandas/tests/io/sas/test_xport.py | 6 +++++- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/pandas/io/sas/sasreader.py b/pandas/io/sas/sasreader.py index 291c9d1ee7f0c..fffdebda8c87a 100644 --- a/pandas/io/sas/sasreader.py +++ b/pandas/io/sas/sasreader.py @@ -6,7 +6,7 @@ from pandas._typing import FilePathOrBuffer, Label -from pandas.io.common import stringify_path +from pandas.io.common import get_filepath_or_buffer, stringify_path if TYPE_CHECKING: from pandas import DataFrame # noqa: F401 @@ -109,6 +109,10 @@ def read_sas( else: raise ValueError("unable to infer format of SAS file") + filepath_or_buffer, _, _, should_close = get_filepath_or_buffer( + filepath_or_buffer, encoding + ) + reader: ReaderBase if format.lower() == "xport": from pandas.io.sas.sas_xport import XportReader @@ -129,5 +133,7 @@ def read_sas( return reader data = reader.read() - reader.close() + + if should_close: + reader.close() return data diff --git a/pandas/tests/io/sas/test_xport.py b/pandas/tests/io/sas/test_xport.py index ae2022e5cabbe..939edb3d8e0b4 100644 --- a/pandas/tests/io/sas/test_xport.py +++ b/pandas/tests/io/sas/test_xport.py @@ -132,7 +132,11 @@ def test2_binary(self): numeric_as_float(data_csv) with open(self.file02, "rb") as fd: - data = read_sas(fd, format="xport") + with td.file_leak_context(): + # GH#35693 ensure that if we pass an open file, we + # dont incorrectly close it in read_sas + data = read_sas(fd, format="xport") + tm.assert_frame_equal(data, data_csv) def test_multiple_types(self): From f337be41f12a864b16413ea5437686a05029f69a Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 12 Aug 2020 16:10:58 -0700 Subject: [PATCH 06/14] TST: make check_file_leaks an auto-use fixture for all tests --- pandas/conftest.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/pandas/conftest.py b/pandas/conftest.py index 97cc514e31bb3..baca786801685 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -157,6 +157,15 @@ def add_imports(doctest_namespace): doctest_namespace["pd"] = pd +@pytest.fixture(autouse=True) +def check_file_leaks(): + """ + Check that a test does not leak file handles. + """ + with td.file_leak_context(): + yield + + # ---------------------------------------------------------------- # Common arguments # ---------------------------------------------------------------- From 430567b23f413a73ba641886049c95a37457107b Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 14 Aug 2020 10:38:29 -0700 Subject: [PATCH 07/14] BUG: unclosed file handle in mmap --- pandas/io/common.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/io/common.py b/pandas/io/common.py index 54f35e689aac8..5ccd992b10704 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -535,6 +535,8 @@ def get_handle( try: wrapped = _MMapWrapper(f) f.close() + handles.remove(f) + handles.append(wrapped) f = wrapped except Exception: # we catch any errors that may have occurred From 1c3bf7351446cb7cc6f24858011ffedc51ba77f2 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 14 Aug 2020 16:21:53 -0700 Subject: [PATCH 08/14] TST: close connections opened by sqlalchemy --- pandas/tests/io/test_sql.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 29b787d39c09d..a7e3162ed7b73 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -263,7 +263,8 @@ def _get_all_tables(self): return table_list def _close_conn(self): - pass + # https://docs.sqlalchemy.org/en/13/core/connections.html#engine-disposal + self.conn.dispose() class PandasSQLTest: @@ -1242,7 +1243,7 @@ class _TestSQLAlchemy(SQLAlchemyMixIn, PandasSQLTest): def setup_class(cls): cls.setup_import() cls.setup_driver() - conn = cls.connect() + conn = cls.conn = cls.connect() conn.connect() def load_test_data_and_sql(self): From d4999208b6b78562e3f88189e058ba08058e21be Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 14 Aug 2020 12:50:20 -0700 Subject: [PATCH 09/14] CLN: remove unused variable (#35726) --- pandas/core/window/rolling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 0306d4de2fc73..966773b7c6982 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -318,7 +318,7 @@ def __repr__(self) -> str: def __iter__(self): window = self._get_window(win_type=None) - blocks, obj = self._create_blocks(self._selected_obj) + _, obj = self._create_blocks(self._selected_obj) index = self._get_window_indexer(window=window) start, end = index.get_window_bounds( From 239b0b3a0b7e804312cbdfec47c848361ae69d33 Mon Sep 17 00:00:00 2001 From: Richard Shadrach <45562402+rhshadrach@users.noreply.github.com> Date: Fri, 14 Aug 2020 16:59:09 -0400 Subject: [PATCH 10/14] agg with list of non-aggregating functions (#35723) --- doc/source/whatsnew/v1.1.1.rst | 1 + pandas/core/groupby/generic.py | 25 +++++++++++-------- pandas/core/groupby/groupby.py | 10 +++++--- .../tests/groupby/aggregate/test_aggregate.py | 13 ++++++++++ 4 files changed, 35 insertions(+), 14 deletions(-) diff --git a/doc/source/whatsnew/v1.1.1.rst b/doc/source/whatsnew/v1.1.1.rst index 85e2a335c55c6..565b4a014bd0c 100644 --- a/doc/source/whatsnew/v1.1.1.rst +++ b/doc/source/whatsnew/v1.1.1.rst @@ -26,6 +26,7 @@ Fixed regressions - Fixed regression in :meth:`DataFrame.reset_index` would raise a ``ValueError`` on empty :class:`DataFrame` with a :class:`MultiIndex` with a ``datetime64`` dtype level (:issue:`35606`, :issue:`35657`) - Fixed regression where :meth:`DataFrame.merge_asof` would raise a ``UnboundLocalError`` when ``left_index`` , ``right_index`` and ``tolerance`` were set (:issue:`35558`) - Fixed regression in ``.groupby(..).rolling(..)`` where a custom ``BaseIndexer`` would be ignored (:issue:`35557`) +- Fixed regression in :meth:`~pandas.core.groupby.DataFrameGroupBy.agg` where a list of functions would produce the wrong results if at least one of the functions did not aggregate. (:issue:`35490`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index b7280a9f7db3c..b806d9856d20f 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -322,11 +322,14 @@ def _aggregate_multiple_funcs(self, arg): # let higher level handle return results - output = self._wrap_aggregated_output(results) + output = self._wrap_aggregated_output(results, index=None) return self.obj._constructor_expanddim(output, columns=columns) + # TODO: index should not be Optional - see GH 35490 def _wrap_series_output( - self, output: Mapping[base.OutputKey, Union[Series, np.ndarray]], index: Index, + self, + output: Mapping[base.OutputKey, Union[Series, np.ndarray]], + index: Optional[Index], ) -> Union[Series, DataFrame]: """ Wraps the output of a SeriesGroupBy operation into the expected result. @@ -335,7 +338,7 @@ def _wrap_series_output( ---------- output : Mapping[base.OutputKey, Union[Series, np.ndarray]] Data to wrap. - index : pd.Index + index : pd.Index or None Index to apply to the output. Returns @@ -363,8 +366,11 @@ def _wrap_series_output( return result + # TODO: Remove index argument, use self.grouper.result_index, see GH 35490 def _wrap_aggregated_output( - self, output: Mapping[base.OutputKey, Union[Series, np.ndarray]] + self, + output: Mapping[base.OutputKey, Union[Series, np.ndarray]], + index: Optional[Index], ) -> Union[Series, DataFrame]: """ Wraps the output of a SeriesGroupBy aggregation into the expected result. @@ -383,9 +389,7 @@ def _wrap_aggregated_output( In the vast majority of cases output will only contain one element. The exception is operations that expand dimensions, like ohlc. """ - result = self._wrap_series_output( - output=output, index=self.grouper.result_index - ) + result = self._wrap_series_output(output=output, index=index) return self._reindex_output(result) def _wrap_transformed_output( @@ -1720,7 +1724,9 @@ def _insert_inaxis_grouper_inplace(self, result): result.insert(0, name, lev) def _wrap_aggregated_output( - self, output: Mapping[base.OutputKey, Union[Series, np.ndarray]] + self, + output: Mapping[base.OutputKey, Union[Series, np.ndarray]], + index: Optional[Index], ) -> DataFrame: """ Wraps the output of DataFrameGroupBy aggregations into the expected result. @@ -1745,8 +1751,7 @@ def _wrap_aggregated_output( self._insert_inaxis_grouper_inplace(result) result = result._consolidate() else: - index = self.grouper.result_index - result.index = index + result.index = self.grouper.result_index if self.axis == 1: result = result.T diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 4597afeeaddbf..0047877ef78ee 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -974,7 +974,9 @@ def _cython_transform(self, how: str, numeric_only: bool = True, **kwargs): return self._wrap_transformed_output(output) - def _wrap_aggregated_output(self, output: Mapping[base.OutputKey, np.ndarray]): + def _wrap_aggregated_output( + self, output: Mapping[base.OutputKey, np.ndarray], index: Optional[Index] + ): raise AbstractMethodError(self) def _wrap_transformed_output(self, output: Mapping[base.OutputKey, np.ndarray]): @@ -1049,7 +1051,7 @@ def _cython_agg_general( if len(output) == 0: raise DataError("No numeric types to aggregate") - return self._wrap_aggregated_output(output) + return self._wrap_aggregated_output(output, index=self.grouper.result_index) def _python_agg_general( self, func, *args, engine="cython", engine_kwargs=None, **kwargs @@ -1102,7 +1104,7 @@ def _python_agg_general( output[key] = maybe_cast_result(values[mask], result) - return self._wrap_aggregated_output(output) + return self._wrap_aggregated_output(output, index=self.grouper.result_index) def _concat_objects(self, keys, values, not_indexed_same: bool = False): from pandas.core.reshape.concat import concat @@ -2534,7 +2536,7 @@ def _get_cythonized_result( raise TypeError(error_msg) if aggregate: - return self._wrap_aggregated_output(output) + return self._wrap_aggregated_output(output, index=self.grouper.result_index) else: return self._wrap_transformed_output(output) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 40a20c8210052..ce9d4b892d775 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -1061,3 +1061,16 @@ def test_groupby_get_by_index(): res = df.groupby("A").agg({"B": lambda x: x.get(x.index[-1])}) expected = pd.DataFrame(dict(A=["S", "W"], B=[1.0, 2.0])).set_index("A") pd.testing.assert_frame_equal(res, expected) + + +def test_nonagg_agg(): + # GH 35490 - Single/Multiple agg of non-agg function give same results + # TODO: agg should raise for functions that don't aggregate + df = pd.DataFrame({"a": [1, 1, 2, 2], "b": [1, 2, 2, 1]}) + g = df.groupby("a") + + result = g.agg(["cumsum"]) + result.columns = result.columns.droplevel(-1) + expected = g.agg("cumsum") + + tm.assert_frame_equal(result, expected) From e0c8fe0098dfe717e8a223a7aea12003293fe413 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 14 Aug 2020 14:01:02 -0700 Subject: [PATCH 11/14] BLD: bump xlrd min version to 1.2.0 (#35728) --- ci/deps/azure-37-locale_slow.yaml | 2 +- ci/deps/azure-37-minimum_versions.yaml | 2 +- doc/source/getting_started/install.rst | 2 +- doc/source/whatsnew/v1.2.0.rst | 2 +- pandas/compat/_optional.py | 2 +- pandas/tests/io/excel/test_readers.py | 43 +++++++------------------- 6 files changed, 16 insertions(+), 37 deletions(-) diff --git a/ci/deps/azure-37-locale_slow.yaml b/ci/deps/azure-37-locale_slow.yaml index 3ccb66e09fe7e..8000f3e6b9a9c 100644 --- a/ci/deps/azure-37-locale_slow.yaml +++ b/ci/deps/azure-37-locale_slow.yaml @@ -24,7 +24,7 @@ dependencies: - pytz=2017.3 - scipy - sqlalchemy=1.2.8 - - xlrd=1.1.0 + - xlrd=1.2.0 - xlsxwriter=1.0.2 - xlwt=1.3.0 - html5lib=1.0.1 diff --git a/ci/deps/azure-37-minimum_versions.yaml b/ci/deps/azure-37-minimum_versions.yaml index 94cc5812bcc10..05b1957198bc4 100644 --- a/ci/deps/azure-37-minimum_versions.yaml +++ b/ci/deps/azure-37-minimum_versions.yaml @@ -25,7 +25,7 @@ dependencies: - pytz=2017.3 - pyarrow=0.15 - scipy=1.2 - - xlrd=1.1.0 + - xlrd=1.2.0 - xlsxwriter=1.0.2 - xlwt=1.3.0 - html5lib=1.0.1 diff --git a/doc/source/getting_started/install.rst b/doc/source/getting_started/install.rst index 7ab150394bf51..4c270117e079e 100644 --- a/doc/source/getting_started/install.rst +++ b/doc/source/getting_started/install.rst @@ -287,7 +287,7 @@ s3fs 0.4.0 Amazon S3 access tabulate 0.8.3 Printing in Markdown-friendly format (see `tabulate`_) xarray 0.12.0 pandas-like API for N-dimensional data xclip Clipboard I/O on linux -xlrd 1.1.0 Excel reading +xlrd 1.2.0 Excel reading xlwt 1.3.0 Excel writing xsel Clipboard I/O on linux zlib Compression for HDF5 diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index a3bb6dfd86bd2..42f95d88d74ac 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -122,7 +122,7 @@ Optional libraries below the lowest tested version may still work, but are not c +-----------------+-----------------+---------+ | xarray | 0.12.0 | X | +-----------------+-----------------+---------+ -| xlrd | 1.1.0 | | +| xlrd | 1.2.0 | X | +-----------------+-----------------+---------+ | xlsxwriter | 1.0.2 | X | +-----------------+-----------------+---------+ diff --git a/pandas/compat/_optional.py b/pandas/compat/_optional.py index 6423064732def..81eac490fe5b9 100644 --- a/pandas/compat/_optional.py +++ b/pandas/compat/_optional.py @@ -27,7 +27,7 @@ "tables": "3.4.3", "tabulate": "0.8.3", "xarray": "0.8.2", - "xlrd": "1.1.0", + "xlrd": "1.2.0", "xlwt": "1.2.0", "xlsxwriter": "0.9.8", "numba": "0.46.0", diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index b610c5ec3a838..51fbbf836a03f 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -1,9 +1,7 @@ -import contextlib from datetime import datetime, time from functools import partial import os from urllib.error import URLError -import warnings import numpy as np import pytest @@ -14,22 +12,6 @@ from pandas import DataFrame, Index, MultiIndex, Series import pandas._testing as tm - -@contextlib.contextmanager -def ignore_xlrd_time_clock_warning(): - """ - Context manager to ignore warnings raised by the xlrd library, - regarding the deprecation of `time.clock` in Python 3.7. - """ - with warnings.catch_warnings(): - warnings.filterwarnings( - action="ignore", - message="time.clock has been deprecated", - category=DeprecationWarning, - ) - yield - - read_ext_params = [".xls", ".xlsx", ".xlsm", ".xlsb", ".ods"] engine_params = [ # Add any engines to test here @@ -134,21 +116,19 @@ def test_usecols_int(self, read_ext, df_ref): # usecols as int msg = "Passing an integer for `usecols`" with pytest.raises(ValueError, match=msg): - with ignore_xlrd_time_clock_warning(): - pd.read_excel( - "test1" + read_ext, sheet_name="Sheet1", index_col=0, usecols=3 - ) + pd.read_excel( + "test1" + read_ext, sheet_name="Sheet1", index_col=0, usecols=3 + ) # usecols as int with pytest.raises(ValueError, match=msg): - with ignore_xlrd_time_clock_warning(): - pd.read_excel( - "test1" + read_ext, - sheet_name="Sheet2", - skiprows=[1], - index_col=0, - usecols=3, - ) + pd.read_excel( + "test1" + read_ext, + sheet_name="Sheet2", + skiprows=[1], + index_col=0, + usecols=3, + ) def test_usecols_list(self, read_ext, df_ref): if pd.read_excel.keywords["engine"] == "pyxlsb": @@ -597,8 +577,7 @@ def test_sheet_name(self, read_ext, df_ref): df1 = pd.read_excel( filename + read_ext, sheet_name=sheet_name, index_col=0 ) # doc - with ignore_xlrd_time_clock_warning(): - df2 = pd.read_excel(filename + read_ext, index_col=0, sheet_name=sheet_name) + df2 = pd.read_excel(filename + read_ext, index_col=0, sheet_name=sheet_name) tm.assert_frame_equal(df1, df_ref, check_names=False) tm.assert_frame_equal(df2, df_ref, check_names=False) From eab5d637579e3cd976b2ed68194e65cc04362b95 Mon Sep 17 00:00:00 2001 From: estasney Date: Fri, 14 Aug 2020 17:01:49 -0400 Subject: [PATCH 12/14] Fix broken link in cookbook.rst (#35729) --- doc/source/user_guide/cookbook.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/user_guide/cookbook.rst b/doc/source/user_guide/cookbook.rst index 49487ac327e73..7542e1dc7df6f 100644 --- a/doc/source/user_guide/cookbook.rst +++ b/doc/source/user_guide/cookbook.rst @@ -765,7 +765,7 @@ Timeseries `__ `Aggregation and plotting time series -`__ +`__ Turn a matrix with hours in columns and days in rows into a continuous row sequence in the form of a time series. `How to rearrange a Python pandas DataFrame? From b61060875ceb950bfa087c437a8b255a49d9e47b Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 15 Aug 2020 16:00:23 -0700 Subject: [PATCH 13/14] Revert parts of #35711 --- pandas/conftest.py | 9 --------- pandas/tests/io/parser/test_common.py | 1 + pandas/tests/io/test_sql.py | 5 ++--- 3 files changed, 3 insertions(+), 12 deletions(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index baca786801685..97cc514e31bb3 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -157,15 +157,6 @@ def add_imports(doctest_namespace): doctest_namespace["pd"] = pd -@pytest.fixture(autouse=True) -def check_file_leaks(): - """ - Check that a test does not leak file handles. - """ - with td.file_leak_context(): - yield - - # ---------------------------------------------------------------- # Common arguments # ---------------------------------------------------------------- diff --git a/pandas/tests/io/parser/test_common.py b/pandas/tests/io/parser/test_common.py index 3d5f6ae3a4af9..1d8d5a29686a4 100644 --- a/pandas/tests/io/parser/test_common.py +++ b/pandas/tests/io/parser/test_common.py @@ -1836,6 +1836,7 @@ def test_raise_on_no_columns(all_parsers, nrows): parser.read_csv(StringIO(data)) +@td.check_file_leaks def test_memory_map(all_parsers, csv_dir_path): mmap_file = os.path.join(csv_dir_path, "test_mmap.csv") parser = all_parsers diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index a7e3162ed7b73..29b787d39c09d 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -263,8 +263,7 @@ def _get_all_tables(self): return table_list def _close_conn(self): - # https://docs.sqlalchemy.org/en/13/core/connections.html#engine-disposal - self.conn.dispose() + pass class PandasSQLTest: @@ -1243,7 +1242,7 @@ class _TestSQLAlchemy(SQLAlchemyMixIn, PandasSQLTest): def setup_class(cls): cls.setup_import() cls.setup_driver() - conn = cls.conn = cls.connect() + conn = cls.connect() conn.connect() def load_test_data_and_sql(self): From 3be2f2177121a4786a48a8e6e10b565f00d9e4be Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 15 Aug 2020 16:54:01 -0700 Subject: [PATCH 14/14] mypy fixup --- pandas/io/common.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/io/common.py b/pandas/io/common.py index 5ccd992b10704..d1305c9cabe0e 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -18,6 +18,7 @@ Optional, Tuple, Type, + Union, ) from urllib.parse import ( urljoin, @@ -452,7 +453,7 @@ def get_handle( except ImportError: need_text_wrapping = (BufferedIOBase, RawIOBase) - handles: List[IO] = list() + handles: List[Union[IO, _MMapWrapper]] = list() f = path_or_buf # Convert pathlib.Path/py.path.local or string