From e184aa41e6acace44413719bdf9e96fc535af207 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 3 Dec 2021 18:13:07 -0800 Subject: [PATCH 1/4] ENH: IntervalArray.min/max --- doc/source/whatsnew/v1.4.0.rst | 2 + pandas/core/arrays/base.py | 5 +- pandas/core/arrays/interval.py | 30 ++++++++++++ pandas/tests/arrays/interval/test_interval.py | 47 +++++++++++++++++++ pandas/tests/extension/test_interval.py | 25 ++++++++-- pandas/tests/series/test_ufunc.py | 8 ---- 6 files changed, 104 insertions(+), 13 deletions(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 4a4e7dd6d15d7..34e882efbf428 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -218,6 +218,8 @@ Other enhancements ``USFederalHolidayCalendar``. See also `Other API changes`_. - :meth:`.Rolling.var`, :meth:`.Expanding.var`, :meth:`.Rolling.std`, :meth:`.Expanding.std` now support `Numba `_ execution with the ``engine`` keyword (:issue:`44461`) - :meth:`Series.info` has been added, for compatibility with :meth:`DataFrame.info` (:issue:`5167`) +- Implemented :meth:`IntervalArray.min`, :meth:`IntervalArray.max`, as a result of which ``min`` and ``max`` now work for :class:`IntervalIndex`, :class:`Series` and :class:`DataFrame` with ``IntervalDtype`` (:issue:`??`) +- .. --------------------------------------------------------------------------- diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 2665a65b06593..682ba0ccd11cc 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -1352,7 +1352,10 @@ def _reduce(self, name: str, *, skipna: bool = True, **kwargs): ------ TypeError : subclass does not define reductions """ - raise TypeError(f"cannot perform {name} with type {self.dtype}") + meth = getattr(self, name, None) + if meth is None: + raise TypeError(f"cannot perform {name} with type {self.dtype}") + return meth(skipna=skipna, **kwargs) # https://github.com/python/typeshed/issues/2148#issuecomment-520783318 # Incompatible types in assignment (expression has type "None", base class diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index e1347391b2bdd..0c50c36f8d1fc 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -790,6 +790,36 @@ def argsort( ascending=ascending, kind=kind, na_position=na_position, **kwargs ) + def min(self, *, axis: int | None = None, skipna: bool = True): + if axis is not None and axis >= self.ndim: + raise ValueError(axis) + if not len(self): + return self._na_value + + mask = self.isna() + if mask.any(): + if not skipna: + return self._na_value + return self[~mask].min() + + indexer = self.argsort()[0] + return self[indexer] + + def max(self, *, axis: int | None = None, skipna: bool = True): + if axis is not None and axis >= self.ndim: + raise ValueError(axis) + if not len(self): + return self._na_value + + mask = self.isna() + if mask.any(): + if not skipna: + return self._na_value + return self[~mask].max() + + indexer = self.argsort()[-1] + return self[indexer] + def fillna( self: IntervalArrayT, value=None, method=None, limit=None ) -> IntervalArrayT: diff --git a/pandas/tests/arrays/interval/test_interval.py b/pandas/tests/arrays/interval/test_interval.py index 7d27b617c0e6e..9fe382553f2dc 100644 --- a/pandas/tests/arrays/interval/test_interval.py +++ b/pandas/tests/arrays/interval/test_interval.py @@ -103,6 +103,8 @@ def test_shift_datetime(self): class TestSetitem: def test_set_na(self, left_right_dtypes): left, right = left_right_dtypes + left = left.copy(deep=True) + right = right.copy(deep=True) result = IntervalArray.from_arrays(left, right) if result.dtype.subtype.kind not in ["m", "M"]: @@ -161,6 +163,51 @@ def test_repr(): assert result == expected +class TestReductions: + def test_min_max(self, left_right_dtypes, index_or_series_or_array): + left, right = left_right_dtypes + left = left.copy(deep=True) + right = right.copy(deep=True) + arr = IntervalArray.from_arrays(left, right) + + # The expected results below are only valid if monotonic + assert left.is_monotonic_increasing + assert Index(arr).is_monotonic_increasing + + MIN = arr[0] + MAX = arr[-1] + + indexer = np.arange(len(arr)) + np.random.shuffle(indexer) + arr = arr.take(indexer) + + arr_na = arr.insert(2, np.nan) + + arr = index_or_series_or_array(arr) + arr_na = index_or_series_or_array(arr_na) + + for skipna in [True, False]: + res = arr.min(skipna=skipna) + assert res == MIN + assert type(res) == type(MIN) + + res = arr.max(skipna=skipna) + assert res == MAX + assert type(res) == type(MAX) + + res = arr_na.min(skipna=False) + assert np.isnan(res) + res = arr_na.max(skipna=False) + assert np.isnan(res) + + res = arr_na.min(skipna=True) + assert res == MIN + assert type(res) == type(MIN) + res = arr_na.max(skipna=True) + assert res == MAX + assert type(res) == type(MAX) + + # ---------------------------------------------------------------------------- # Arrow interaction diff --git a/pandas/tests/extension/test_interval.py b/pandas/tests/extension/test_interval.py index 24c0d619e2b1a..2308cb81dbfb0 100644 --- a/pandas/tests/extension/test_interval.py +++ b/pandas/tests/extension/test_interval.py @@ -18,7 +18,10 @@ from pandas.core.dtypes.dtypes import IntervalDtype -from pandas import Interval +from pandas import ( + Interval, + Series, +) from pandas.core.arrays import IntervalArray from pandas.tests.extension import base @@ -102,7 +105,18 @@ def test_view(self, data): class TestReduce(base.BaseNoReduceTests): - pass + @pytest.mark.parametrize("skipna", [True, False]) + def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna): + op_name = all_numeric_reductions + ser = Series(data) + + if op_name in ["min", "max"]: + # IntervalArray *does* implement these + assert getattr(ser, op_name)(skipna=skipna) in data + assert getattr(data, op_name)(skipna=skipna) in data + return + + super().test_reduce_series_numeric(data, all_numeric_reductions, skipna) class TestMethods(BaseInterval, base.BaseMethodsTests): @@ -110,9 +124,12 @@ class TestMethods(BaseInterval, base.BaseMethodsTests): def test_combine_add(self, data_repeated): pass - @pytest.mark.skip(reason="Not Applicable") + @pytest.mark.xfail( + reason="Raises with incorrect message bc it disallows *all* listlikes " + "instead of just wrong-length listlikes" + ) def test_fillna_length_mismatch(self, data_missing): - pass + super().test_fillna_length_mismatch(data_missing) class TestMissing(BaseInterval, base.BaseMissingTests): diff --git a/pandas/tests/series/test_ufunc.py b/pandas/tests/series/test_ufunc.py index 11a03c364458e..d8f3985e2c559 100644 --- a/pandas/tests/series/test_ufunc.py +++ b/pandas/tests/series/test_ufunc.py @@ -277,14 +277,6 @@ def test_reduce(values, box, request): if values.dtype.kind in ["i", "f"]: # ATM Index casts to object, so we get python ints/floats same_type = False - elif isinstance(values, pd.IntervalIndex): - mark = pytest.mark.xfail(reason="IntervalArray.min/max not implemented") - request.node.add_marker(mark) - - elif box is pd.Series or box is pd.DataFrame: - if isinstance(values, pd.IntervalIndex): - mark = pytest.mark.xfail(reason="IntervalArray.min/max not implemented") - request.node.add_marker(mark) if values.dtype == "i8" and box is pd.array: # FIXME: pd.array casts to Int64 From 66f1f5eab5cebad1b0830833566559f5701c9443 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 3 Dec 2021 18:14:23 -0800 Subject: [PATCH 2/4] GH ref --- doc/source/whatsnew/v1.4.0.rst | 2 +- pandas/tests/arrays/interval/test_interval.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 34e882efbf428..d05687177515e 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -218,7 +218,7 @@ Other enhancements ``USFederalHolidayCalendar``. See also `Other API changes`_. - :meth:`.Rolling.var`, :meth:`.Expanding.var`, :meth:`.Rolling.std`, :meth:`.Expanding.std` now support `Numba `_ execution with the ``engine`` keyword (:issue:`44461`) - :meth:`Series.info` has been added, for compatibility with :meth:`DataFrame.info` (:issue:`5167`) -- Implemented :meth:`IntervalArray.min`, :meth:`IntervalArray.max`, as a result of which ``min`` and ``max`` now work for :class:`IntervalIndex`, :class:`Series` and :class:`DataFrame` with ``IntervalDtype`` (:issue:`??`) +- Implemented :meth:`IntervalArray.min`, :meth:`IntervalArray.max`, as a result of which ``min`` and ``max`` now work for :class:`IntervalIndex`, :class:`Series` and :class:`DataFrame` with ``IntervalDtype`` (:issue:`44746`) - diff --git a/pandas/tests/arrays/interval/test_interval.py b/pandas/tests/arrays/interval/test_interval.py index 9fe382553f2dc..2b80d9e28fd04 100644 --- a/pandas/tests/arrays/interval/test_interval.py +++ b/pandas/tests/arrays/interval/test_interval.py @@ -165,6 +165,7 @@ def test_repr(): class TestReductions: def test_min_max(self, left_right_dtypes, index_or_series_or_array): + # GH#44746 left, right = left_right_dtypes left = left.copy(deep=True) right = right.copy(deep=True) From 7380e43ab5a11d97b369aaae159f87514f72df74 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 4 Dec 2021 18:25:51 -0800 Subject: [PATCH 3/4] Test for axis validation --- pandas/core/arrays/interval.py | 8 ++++---- pandas/tests/arrays/interval/test_interval.py | 19 +++++++++++++++++++ 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 0c50c36f8d1fc..2eaa7d3c1fffa 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -791,8 +791,8 @@ def argsort( ) def min(self, *, axis: int | None = None, skipna: bool = True): - if axis is not None and axis >= self.ndim: - raise ValueError(axis) + nv.validate_minmax_axis(axis, self.ndim) + if not len(self): return self._na_value @@ -806,8 +806,8 @@ def min(self, *, axis: int | None = None, skipna: bool = True): return self[indexer] def max(self, *, axis: int | None = None, skipna: bool = True): - if axis is not None and axis >= self.ndim: - raise ValueError(axis) + nv.validate_minmax_axis(axis, self.ndim) + if not len(self): return self._na_value diff --git a/pandas/tests/arrays/interval/test_interval.py b/pandas/tests/arrays/interval/test_interval.py index 2b80d9e28fd04..400846cc4ca1d 100644 --- a/pandas/tests/arrays/interval/test_interval.py +++ b/pandas/tests/arrays/interval/test_interval.py @@ -164,6 +164,25 @@ def test_repr(): class TestReductions: + def test_min_max_invalid_axis(self, left_right_dtypes): + left, right = left_right_dtypes + left = left.copy(deep=True) + right = right.copy(deep=True) + arr = IntervalArray.from_arrays(left, right) + + msg = "`axis` must be fewer than the number of dimensions" + for axis in [-2, 1]: + with pytest.raises(ValueError, match=msg): + arr.min(axis=axis) + with pytest.raises(ValueError, match=msg): + arr.max(axis=axis) + + msg = "'>=' not supported between" + with pytest.raises(TypeError, match=msg): + arr.min(axis="foo") + with pytest.raises(TypeError, match=msg): + arr.max(axis="foo") + def test_min_max(self, left_right_dtypes, index_or_series_or_array): # GH#44746 left, right = left_right_dtypes From a1aa025f64ad93dfb84dc19c05b3c922569855be Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 4 Dec 2021 21:51:22 -0800 Subject: [PATCH 4/4] CI troubleshoot --- pandas/_testing/__init__.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index a89946d1f8cc8..6248154422252 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -1077,14 +1077,16 @@ def shares_memory(left, right) -> bool: if isinstance(left, NDArrayBackedExtensionArray): return shares_memory(left._ndarray, right) - if isinstance(left, pd.SparseArray): + if isinstance(left, pd.core.arrays.SparseArray): return shares_memory(left.sp_values, right) if isinstance(left, ExtensionArray) and left.dtype == "string[pyarrow]": # https://github.com/pandas-dev/pandas/pull/43930#discussion_r736862669 if isinstance(right, ExtensionArray) and right.dtype == "string[pyarrow]": - left_pa_data = left._data - right_pa_data = right._data + # error: "ExtensionArray" has no attribute "_data" + left_pa_data = left._data # type: ignore[attr-defined] + # error: "ExtensionArray" has no attribute "_data" + right_pa_data = right._data # type: ignore[attr-defined] left_buf1 = left_pa_data.chunk(0).buffers()[1] right_buf1 = right_pa_data.chunk(0).buffers()[1] return left_buf1 == right_buf1