From de006513fa2b13ed96c8bc4846040a69b9dc6a5f Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 22 Nov 2021 14:25:09 -0800 Subject: [PATCH 1/4] BUG: Series.view from int64 to datetimelike --- pandas/core/arrays/_mixins.py | 48 +++++++++++++++++++++++- pandas/core/arrays/datetimelike.py | 37 +----------------- pandas/core/indexes/base.py | 1 - pandas/core/series.py | 8 ++-- pandas/tests/series/methods/test_view.py | 12 ++++++ 5 files changed, 65 insertions(+), 41 deletions(-) diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index 674379f6d65f8..9d534a5a8d815 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -16,6 +16,8 @@ from pandas._libs import lib from pandas._libs.arrays import NDArrayBacked from pandas._typing import ( + ArrayLike, + Dtype, F, PositionalIndexer2D, PositionalIndexerTuple, @@ -34,8 +36,15 @@ validate_insert_loc, ) -from pandas.core.dtypes.common import is_dtype_equal -from pandas.core.dtypes.dtypes import ExtensionDtype +from pandas.core.dtypes.common import ( + is_dtype_equal, + pandas_dtype, +) +from pandas.core.dtypes.dtypes import ( + DatetimeTZDtype, + ExtensionDtype, + PeriodDtype, +) from pandas.core.dtypes.missing import array_equivalent from pandas.core import missing @@ -101,6 +110,41 @@ def _validate_scalar(self, value): # ------------------------------------------------------------------------ + def view(self, dtype: Dtype | None = None) -> ArrayLike: + # We handle datetime64, datetime64tz, timedelta64, and period + # dtypes here. Everything else we pass through to the underlying + # ndarray. + if dtype is None or dtype is self.dtype: + return self._from_backing_data(self._ndarray) + + if isinstance(dtype, type): + # we sometimes pass non-dtype objects, e.g np.ndarray; + # pass those through to the underlying ndarray + return self._ndarray.view(dtype) + + dtype = pandas_dtype(dtype) + arr = self._ndarray + + if isinstance(dtype, (PeriodDtype, DatetimeTZDtype)): + cls = dtype.construct_array_type() + return cls(arr.view("i8"), dtype=dtype) + elif dtype == "M8[ns]": + from pandas.core.arrays import DatetimeArray + + return DatetimeArray(arr.view("i8"), dtype=dtype) + elif dtype == "m8[ns]": + from pandas.core.arrays import TimedeltaArray + + return TimedeltaArray(arr.view("i8"), dtype=dtype) + + # error: Incompatible return value type (got "ndarray", expected + # "ExtensionArray") + # error: Argument "dtype" to "view" of "_ArrayOrScalarCommon" has incompatible + # type "Union[ExtensionDtype, dtype[Any]]"; expected "Union[dtype[Any], None, + # type, _SupportsDType, str, Union[Tuple[Any, int], Tuple[Any, Union[int, + # Sequence[int]]], List[Any], _DTypeDict, Tuple[Any, Any]]]" + return arr.view(dtype=dtype) # type: ignore[return-value,arg-type] + def take( self: NDArrayBackedExtensionArrayT, indices: TakeIndexer, diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 6f18db6caab7d..44f0ed9db551f 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -85,11 +85,7 @@ is_unsigned_integer_dtype, pandas_dtype, ) -from pandas.core.dtypes.dtypes import ( - DatetimeTZDtype, - ExtensionDtype, - PeriodDtype, -) +from pandas.core.dtypes.dtypes import ExtensionDtype from pandas.core.dtypes.missing import ( is_valid_na_for_dtype, isna, @@ -461,36 +457,7 @@ def view(self, dtype: Dtype | None = ...) -> ArrayLike: ... def view(self, dtype: Dtype | None = None) -> ArrayLike: - # We handle datetime64, datetime64tz, timedelta64, and period - # dtypes here. Everything else we pass through to the underlying - # ndarray. - if dtype is None or dtype is self.dtype: - return type(self)(self._ndarray, dtype=self.dtype) - - if isinstance(dtype, type): - # we sometimes pass non-dtype objects, e.g np.ndarray; - # pass those through to the underlying ndarray - return self._ndarray.view(dtype) - - dtype = pandas_dtype(dtype) - if isinstance(dtype, (PeriodDtype, DatetimeTZDtype)): - cls = dtype.construct_array_type() - return cls(self.asi8, dtype=dtype) - elif dtype == "M8[ns]": - from pandas.core.arrays import DatetimeArray - - return DatetimeArray(self.asi8, dtype=dtype) - elif dtype == "m8[ns]": - from pandas.core.arrays import TimedeltaArray - - return TimedeltaArray(self.asi8, dtype=dtype) - # error: Incompatible return value type (got "ndarray", expected - # "ExtensionArray") - # error: Argument "dtype" to "view" of "_ArrayOrScalarCommon" has incompatible - # type "Union[ExtensionDtype, dtype[Any]]"; expected "Union[dtype[Any], None, - # type, _SupportsDType, str, Union[Tuple[Any, int], Tuple[Any, Union[int, - # Sequence[int]]], List[Any], _DTypeDict, Tuple[Any, Any]]]" - return self._ndarray.view(dtype=dtype) # type: ignore[return-value,arg-type] + return super().view(dtype) # ------------------------------------------------------------------ # ExtensionArray Interface diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 220b43f323a5f..bc591d49add5e 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -947,7 +947,6 @@ def view(self, cls=None): # e.g. m8[s] return self._data.view(cls) - arr = self._data.view("i8") idx_cls = self._dtype_to_subclass(dtype) arr_cls = idx_cls._data_cls arr = arr_cls(self._data.view("i8"), dtype=dtype) diff --git a/pandas/core/series.py b/pandas/core/series.py index e0a63b8e35105..3cf4fe6767c3a 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -802,9 +802,11 @@ def view(self, dtype: Dtype | None = None) -> Series: 4 2 dtype: int8 """ - return self._constructor( - self._values.view(dtype), index=self.index - ).__finalize__(self, method="view") + # self.array instead of self._values so we piggyback on PandasArray + # implementation + res_values = self.array.view(dtype) + res_ser = self._constructor(res_values, index=self.index) + return res_ser.__finalize__(self, method="view") # ---------------------------------------------------------------------- # NDArray Compat diff --git a/pandas/tests/series/methods/test_view.py b/pandas/tests/series/methods/test_view.py index 818023c01e4e7..22902c8648fc5 100644 --- a/pandas/tests/series/methods/test_view.py +++ b/pandas/tests/series/methods/test_view.py @@ -11,6 +11,18 @@ class TestView: + def test_view_i8_to_datetimelike(self): + dti = date_range("2000", periods=4, tz="US/Central") + ser = Series(dti.asi8) + + result = ser.view(dti.dtype) + tm.assert_datetime_array_equal(result._values, dti._data._with_freq(None)) + + pi = dti.tz_localize(None).to_period("D") + ser = Series(pi.asi8) + result = ser.view(pi.dtype) + tm.assert_period_array_equal(result._values, pi._data) + def test_view_tz(self): # GH#24024 ser = Series(date_range("2000", periods=4, tz="US/Central")) From ead1096cff4ae0c6780b5b014865e76d7728eb73 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 22 Nov 2021 15:12:47 -0800 Subject: [PATCH 2/4] BUG: Series.mode with dt64tz or PeriodDtype --- doc/source/whatsnew/v1.4.0.rst | 1 + pandas/core/algorithms.py | 13 ++++++++----- pandas/core/series.py | 2 +- pandas/tests/series/test_reductions.py | 21 +++++++++++++++++++++ 4 files changed, 31 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 1f656f267783f..31d07b82e1c5b 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -547,6 +547,7 @@ Datetimelike - Bug in addition with a :class:`Tick` object and a ``np.timedelta64`` object incorrectly raising instead of returning :class:`Timedelta` (:issue:`44474`) - Bug in adding a ``np.timedelta64`` object to a :class:`BusinessDay` or :class:`CustomBusinessDay` object incorrectly raising (:issue:`44532`) - Bug in :meth:`Index.insert` for inserting ``np.datetime64``, ``np.timedelta64`` or ``tuple`` into :class:`Index` with ``dtype='object'`` with negative loc addine ``None`` and replacing existing value (:issue:`44509`) +- Bug in :meth:`Series.mode` with ``DatetimeTZDtype`` incorrectly returning timezone-naive and ``PeriodDtype`` incorrectly raising (:issue:`41927`) - Timedelta diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 763e76f8497fa..538d9b0348d5f 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -215,7 +215,7 @@ def _reconstruct_data( if isinstance(values, cls) and values.dtype == dtype: return values - values = cls._from_sequence(values) + values = cls._from_sequence(values, dtype=dtype) elif is_bool_dtype(dtype): values = values.astype(dtype, copy=False) @@ -960,15 +960,18 @@ def mode(values, dropna: bool = True) -> Series: original = values # categorical is a fast-path - if is_categorical_dtype(values): + if is_categorical_dtype(values.dtype): if isinstance(values, Series): # TODO: should we be passing `name` below? return Series(values._values.mode(dropna=dropna), name=values.name) return values.mode(dropna=dropna) - if dropna and needs_i8_conversion(values.dtype): - mask = values.isnull() - values = values[~mask] + if needs_i8_conversion(values.dtype): + if dropna: + mask = values.isna() + values = values[~mask] + modes = mode(values.view("i8")) + return modes.view(original.dtype) values = _ensure_data(values) diff --git a/pandas/core/series.py b/pandas/core/series.py index 3cf4fe6767c3a..2b1ab3bad54b2 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1977,7 +1977,7 @@ def count(self, level=None): self, method="count" ) - def mode(self, dropna=True) -> Series: + def mode(self, dropna: bool = True) -> Series: """ Return the mode(s) of the Series. diff --git a/pandas/tests/series/test_reductions.py b/pandas/tests/series/test_reductions.py index ca30e8f1ee6fd..5fa844c521e90 100644 --- a/pandas/tests/series/test_reductions.py +++ b/pandas/tests/series/test_reductions.py @@ -9,6 +9,27 @@ import pandas._testing as tm +@pytest.mark.parametrize("as_period", [True, False]) +def test_mode_extension_dtype(as_period): + # GH#41927 preserve dt64tz dtype + ser = Series([pd.Timestamp(1979, 4, n) for n in range(1, 5)]) + + if as_period: + ser = ser.dt.to_period("D") + else: + ser = ser.dt.tz_localize("US/Central") + + res = ser.mode() + assert res.dtype == ser.dtype + tm.assert_series_equal(res, ser) + + res = pd.core.algorithms.mode(ser._values) + tm.assert_series_equal(res, ser) + + res = pd.core.algorithms.mode(pd.Index(ser)) + tm.assert_series_equal(res, ser) + + def test_reductions_td64_with_nat(): # GH#8617 ser = Series([0, pd.NaT], dtype="m8[ns]") From 337728764ba8c1926486cf939e951ccb2694ceed Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 25 Nov 2021 10:07:23 -0800 Subject: [PATCH 3/4] comment --- pandas/core/arrays/datetimelike.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 44f0ed9db551f..c92f6bb35d459 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -457,6 +457,8 @@ def view(self, dtype: Dtype | None = ...) -> ArrayLike: ... def view(self, dtype: Dtype | None = None) -> ArrayLike: + # we need to explicitly call super() method as long as the `@overload`s + # are present in this file. return super().view(dtype) # ------------------------------------------------------------------ From 9f295069d93307ee20dcc6821b369a8f02feaa77 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 25 Nov 2021 12:31:58 -0800 Subject: [PATCH 4/4] update import --- pandas/tests/series/test_reductions.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/tests/series/test_reductions.py b/pandas/tests/series/test_reductions.py index 5fa844c521e90..c5b0428131973 100644 --- a/pandas/tests/series/test_reductions.py +++ b/pandas/tests/series/test_reductions.py @@ -7,6 +7,7 @@ Series, ) import pandas._testing as tm +from pandas.core.algorithms import mode @pytest.mark.parametrize("as_period", [True, False]) @@ -23,10 +24,10 @@ def test_mode_extension_dtype(as_period): assert res.dtype == ser.dtype tm.assert_series_equal(res, ser) - res = pd.core.algorithms.mode(ser._values) + res = mode(ser._values) tm.assert_series_equal(res, ser) - res = pd.core.algorithms.mode(pd.Index(ser)) + res = mode(pd.Index(ser)) tm.assert_series_equal(res, ser)