From a1bdb34fa320f6065a670d25a869a1ea917a6570 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 29 Dec 2018 08:36:34 -0800 Subject: [PATCH 1/3] reductions from 24024 --- pandas/core/arrays/datetimelike.py | 66 ++++++++++++++++++++++++ pandas/tests/arrays/test_datetimelike.py | 7 +++ pandas/tests/arrays/test_datetimes.py | 38 ++++++++++++++ pandas/tests/arrays/test_period.py | 39 ++++++++++++++ pandas/tests/arrays/test_timedeltas.py | 31 +++++++++++ 5 files changed, 181 insertions(+) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index a6f603d16affe..284695504170b 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -29,6 +29,7 @@ from pandas.core.algorithms import checked_add_with_arr, take, unique1d import pandas.core.common as com +from pandas.core import nanops from pandas.tseries import frequencies from pandas.tseries.offsets import DateOffset, Tick @@ -1381,6 +1382,71 @@ def _ensure_localized(self, arg, ambiguous='raise', nonexistent='raise', ) return arg + # -------------------------------------------------------------- + # Reductions + + def _reduce(self, name, axis=0, skipna=True, **kwargs): + op = getattr(self, name, None) + if op: + return op(axis=axis, skipna=skipna, **kwargs) + else: + raise TypeError("cannot perform {name} with type {dtype}" + .format(name=name, dtype=self.dtype)) + # TODO: use super(DatetimeLikeArrayMixin, self)._reduce + # after we subclass ExtensionArray + + def min(self, axis=None, skipna=True, *args, **kwargs): + """ + Return the minimum value of the Array or minimum along + an axis. + + See Also + -------- + numpy.ndarray.min + Index.min : Return the minimum value in an Index. + Series.min : Return the minimum value in a Series. + """ + nv.validate_min(args, kwargs) + nv.validate_minmax_axis(axis) + + result = nanops.nanmin(self.asi8, skipna=skipna, mask=self.isna()) + if isna(result): + # Period._from_ordinal does not handle np.nan gracefully + return NaT + return self._box_func(result) + + def max(self, axis=None, skipna=True, *args, **kwargs): + """ + Return the maximum value of the Array or maximum along + an axis. + + See Also + -------- + numpy.ndarray.max + Index.max : Return the maximum value in an Index. + Series.max : Return the maximum value in a Series. + """ + # TODO: skipna is broken with max. + # See https://github.com/pandas-dev/pandas/issues/24265 + nv.validate_max(args, kwargs) + nv.validate_minmax_axis(axis) + + mask = self.isna() + if skipna: + values = self[~mask].asi8 + elif mask.any(): + return NaT + else: + values = self.asi8 + + if not len(values): + # short-circut for empty max / min + return NaT + + result = nanops.nanmax(values, skipna=skipna) + # Don't have to worry about NA `result`, since no NA went in. + return self._box_func(result) + DatetimeLikeArrayMixin._add_comparison_ops() diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index 89369c19cf05d..54562935e8072 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -157,6 +157,13 @@ def test_scalar_from_string(self): result = arr._scalar_from_string(str(arr[0])) assert result == arr[0] + def test_reduce_invalid(self): + data = np.arange(10, dtype='i8') * 24 * 3600 * 10**9 + arr = self.array_cls(data, freq='D') + + with pytest.raises(TypeError, match='cannot perform'): + arr._reduce("not a method") + def test_searchsorted(self): data = np.arange(10, dtype='i8') * 24 * 3600 * 10**9 arr = self.array_cls(data, freq='D') diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index 80c87665236d3..7df6ea28b255c 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -90,3 +90,41 @@ def test_setitem_clears_freq(self): tz='US/Central')) a[0] = pd.Timestamp("2000", tz="US/Central") assert a.freq is None + + +class TestReductions(object): + + @pytest.mark.parametrize("tz", [None, "US/Central"]) + def test_min_max(self, tz): + arr = DatetimeArray._from_sequence([ + '2000-01-03', + '2000-01-03', + 'NaT', + '2000-01-02', + '2000-01-05', + '2000-01-04', + ], tz=tz) + + result = arr.min() + expected = pd.Timestamp('2000-01-02', tz=tz) + assert result == expected + + result = arr.max() + expected = pd.Timestamp('2000-01-05', tz=tz) + assert result == expected + + result = arr.min(skipna=False) + assert result is pd.NaT + + result = arr.max(skipna=False) + assert result is pd.NaT + + @pytest.mark.parametrize("tz", [None, "US/Central"]) + @pytest.mark.parametrize('skipna', [True, False]) + def test_min_max_empty(self, skipna, tz): + arr = DatetimeArray._from_sequence([], tz=tz) + result = arr.min(skipna=skipna) + assert result is pd.NaT + + result = arr.max(skipna=skipna) + assert result is pd.NaT diff --git a/pandas/tests/arrays/test_period.py b/pandas/tests/arrays/test_period.py index 82025cd972e6b..387eaa5223bbe 100644 --- a/pandas/tests/arrays/test_period.py +++ b/pandas/tests/arrays/test_period.py @@ -261,3 +261,42 @@ def test_repr_large(): "Length: 1000, dtype: period[D]" ) assert result == expected + + +# ---------------------------------------------------------------------------- +# Reductions + +class TestReductions(object): + + def test_min_max(self): + arr = period_array([ + '2000-01-03', + '2000-01-03', + 'NaT', + '2000-01-02', + '2000-01-05', + '2000-01-04', + ], freq='D') + + result = arr.min() + expected = pd.Period('2000-01-02', freq='D') + assert result == expected + + result = arr.max() + expected = pd.Period('2000-01-05', freq='D') + assert result == expected + + result = arr.min(skipna=False) + assert result is pd.NaT + + result = arr.max(skipna=False) + assert result is pd.NaT + + @pytest.mark.parametrize('skipna', [True, False]) + def test_min_max_empty(self, skipna): + arr = period_array([], freq='D') + result = arr.min(skipna=skipna) + assert result is pd.NaT + + result = arr.max(skipna=skipna) + assert result is pd.NaT diff --git a/pandas/tests/arrays/test_timedeltas.py b/pandas/tests/arrays/test_timedeltas.py index 3264550404642..ea0138f9f0df1 100644 --- a/pandas/tests/arrays/test_timedeltas.py +++ b/pandas/tests/arrays/test_timedeltas.py @@ -77,3 +77,34 @@ def test_setitem_clears_freq(self): a = TimedeltaArray(pd.timedelta_range('1H', periods=2, freq='H')) a[0] = pd.Timedelta("1H") assert a.freq is None + + +class TestReductions(object): + + def test_min_max(self): + arr = TimedeltaArray._from_sequence([ + '3H', '3H', 'NaT', '2H', '5H', '4H', + ]) + + result = arr.min() + expected = pd.Timedelta('2H') + assert result == expected + + result = arr.max() + expected = pd.Timedelta('5H') + assert result == expected + + result = arr.min(skipna=False) + assert result is pd.NaT + + result = arr.max(skipna=False) + assert result is pd.NaT + + @pytest.mark.parametrize('skipna', [True, False]) + def test_min_max_empty(self, skipna): + arr = TimedeltaArray._from_sequence([]) + result = arr.min(skipna=skipna) + assert result is pd.NaT + + result = arr.max(skipna=skipna) + assert result is pd.NaT From 244d37bb35165937c114559342fcf029a1026d6b Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 29 Dec 2018 10:46:57 -0800 Subject: [PATCH 2/3] fix _reduce --- pandas/core/indexes/datetimes.py | 5 +++++ pandas/core/indexes/timedeltas.py | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 1e6daabcc0445..dd39b292872f8 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -1141,6 +1141,11 @@ def _eadata(self): _is_unique = Index.is_unique astype = DatetimeIndexOpsMixin.astype + # Override DatetimeArray methods + max = DatetimeIndexOpsMixin.max + min = DatetimeIndexOpsMixin.min + _reduce = Index._reduce + _timezone = cache_readonly(DatetimeArray._timezone.fget) is_normalized = cache_readonly(DatetimeArray.is_normalized.fget) _resolution = cache_readonly(DatetimeArray._resolution.fget) diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index aa0e1edf06af0..e989d956b9f1a 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -304,6 +304,11 @@ def _eadata(self): _is_monotonic_decreasing = Index.is_monotonic_decreasing _is_unique = Index.is_unique + # Override DatetimeArray methods + max = DatetimeIndexOpsMixin.max + min = DatetimeIndexOpsMixin.min + _reduce = Index._reduce + # ------------------------------------------------------------------- @Appender(_index_shared_docs['astype']) From 56bcc8099b5a62efd8346781416a6cdd0bda9fb3 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 29 Dec 2018 11:24:34 -0800 Subject: [PATCH 3/3] isort fixup --- pandas/core/arrays/datetimelike.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 284695504170b..2273e669f36b4 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -27,9 +27,9 @@ from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries from pandas.core.dtypes.missing import isna +from pandas.core import nanops from pandas.core.algorithms import checked_add_with_arr, take, unique1d import pandas.core.common as com -from pandas.core import nanops from pandas.tseries import frequencies from pandas.tseries.offsets import DateOffset, Tick