Skip to content

BUG: Series.mode with dt64tz or PeriodDtype #44582

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Nov 26, 2021
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.4.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -547,6 +547,7 @@ Datetimelike
- Bug in addition with a :class:`Tick` object and a ``np.timedelta64`` object incorrectly raising instead of returning :class:`Timedelta` (:issue:`44474`)
- Bug in adding a ``np.timedelta64`` object to a :class:`BusinessDay` or :class:`CustomBusinessDay` object incorrectly raising (:issue:`44532`)
- Bug in :meth:`Index.insert` for inserting ``np.datetime64``, ``np.timedelta64`` or ``tuple`` into :class:`Index` with ``dtype='object'`` with negative loc addine ``None`` and replacing existing value (:issue:`44509`)
- Bug in :meth:`Series.mode` with ``DatetimeTZDtype`` incorrectly returning timezone-naive and ``PeriodDtype`` incorrectly raising (:issue:`41927`)
-

Timedelta
Expand Down
13 changes: 8 additions & 5 deletions pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ def _reconstruct_data(
if isinstance(values, cls) and values.dtype == dtype:
return values

values = cls._from_sequence(values)
values = cls._from_sequence(values, dtype=dtype)
elif is_bool_dtype(dtype):
values = values.astype(dtype, copy=False)

Expand Down Expand Up @@ -960,15 +960,18 @@ def mode(values, dropna: bool = True) -> Series:
original = values

# categorical is a fast-path
if is_categorical_dtype(values):
if is_categorical_dtype(values.dtype):
if isinstance(values, Series):
# TODO: should we be passing `name` below?
return Series(values._values.mode(dropna=dropna), name=values.name)
return values.mode(dropna=dropna)

if dropna and needs_i8_conversion(values.dtype):
mask = values.isnull()
values = values[~mask]
if needs_i8_conversion(values.dtype):
if dropna:
mask = values.isna()
values = values[~mask]
modes = mode(values.view("i8"))
return modes.view(original.dtype)

values = _ensure_data(values)

Expand Down
48 changes: 46 additions & 2 deletions pandas/core/arrays/_mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
from pandas._libs import lib
from pandas._libs.arrays import NDArrayBacked
from pandas._typing import (
ArrayLike,
Dtype,
F,
PositionalIndexer2D,
PositionalIndexerTuple,
Expand All @@ -34,8 +36,15 @@
validate_insert_loc,
)

from pandas.core.dtypes.common import is_dtype_equal
from pandas.core.dtypes.dtypes import ExtensionDtype
from pandas.core.dtypes.common import (
is_dtype_equal,
pandas_dtype,
)
from pandas.core.dtypes.dtypes import (
DatetimeTZDtype,
ExtensionDtype,
PeriodDtype,
)
from pandas.core.dtypes.missing import array_equivalent

from pandas.core import missing
Expand Down Expand Up @@ -101,6 +110,41 @@ def _validate_scalar(self, value):

# ------------------------------------------------------------------------

def view(self, dtype: Dtype | None = None) -> ArrayLike:
# We handle datetime64, datetime64tz, timedelta64, and period
# dtypes here. Everything else we pass through to the underlying
# ndarray.
if dtype is None or dtype is self.dtype:
return self._from_backing_data(self._ndarray)

if isinstance(dtype, type):
# we sometimes pass non-dtype objects, e.g np.ndarray;
# pass those through to the underlying ndarray
return self._ndarray.view(dtype)

dtype = pandas_dtype(dtype)
arr = self._ndarray

if isinstance(dtype, (PeriodDtype, DatetimeTZDtype)):
cls = dtype.construct_array_type()
return cls(arr.view("i8"), dtype=dtype)
elif dtype == "M8[ns]":
from pandas.core.arrays import DatetimeArray

return DatetimeArray(arr.view("i8"), dtype=dtype)
elif dtype == "m8[ns]":
from pandas.core.arrays import TimedeltaArray

return TimedeltaArray(arr.view("i8"), dtype=dtype)

# error: Incompatible return value type (got "ndarray", expected
# "ExtensionArray")
# error: Argument "dtype" to "view" of "_ArrayOrScalarCommon" has incompatible
# type "Union[ExtensionDtype, dtype[Any]]"; expected "Union[dtype[Any], None,
# type, _SupportsDType, str, Union[Tuple[Any, int], Tuple[Any, Union[int,
# Sequence[int]]], List[Any], _DTypeDict, Tuple[Any, Any]]]"
return arr.view(dtype=dtype) # type: ignore[return-value,arg-type]

def take(
self: NDArrayBackedExtensionArrayT,
indices: TakeIndexer,
Expand Down
37 changes: 2 additions & 35 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,11 +85,7 @@
is_unsigned_integer_dtype,
pandas_dtype,
)
from pandas.core.dtypes.dtypes import (
DatetimeTZDtype,
ExtensionDtype,
PeriodDtype,
)
from pandas.core.dtypes.dtypes import ExtensionDtype
from pandas.core.dtypes.missing import (
is_valid_na_for_dtype,
isna,
Expand Down Expand Up @@ -461,36 +457,7 @@ def view(self, dtype: Dtype | None = ...) -> ArrayLike:
...

def view(self, dtype: Dtype | None = None) -> ArrayLike:
# We handle datetime64, datetime64tz, timedelta64, and period
# dtypes here. Everything else we pass through to the underlying
# ndarray.
if dtype is None or dtype is self.dtype:
return type(self)(self._ndarray, dtype=self.dtype)

if isinstance(dtype, type):
# we sometimes pass non-dtype objects, e.g np.ndarray;
# pass those through to the underlying ndarray
return self._ndarray.view(dtype)

dtype = pandas_dtype(dtype)
if isinstance(dtype, (PeriodDtype, DatetimeTZDtype)):
cls = dtype.construct_array_type()
return cls(self.asi8, dtype=dtype)
elif dtype == "M8[ns]":
from pandas.core.arrays import DatetimeArray

return DatetimeArray(self.asi8, dtype=dtype)
elif dtype == "m8[ns]":
from pandas.core.arrays import TimedeltaArray

return TimedeltaArray(self.asi8, dtype=dtype)
# error: Incompatible return value type (got "ndarray", expected
# "ExtensionArray")
# error: Argument "dtype" to "view" of "_ArrayOrScalarCommon" has incompatible
# type "Union[ExtensionDtype, dtype[Any]]"; expected "Union[dtype[Any], None,
# type, _SupportsDType, str, Union[Tuple[Any, int], Tuple[Any, Union[int,
# Sequence[int]]], List[Any], _DTypeDict, Tuple[Any, Any]]]"
return self._ndarray.view(dtype=dtype) # type: ignore[return-value,arg-type]
return super().view(dtype)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you remove this entirely (since it will just user the super view anyhow)?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not with the @overloads just above this


# ------------------------------------------------------------------
# ExtensionArray Interface
Expand Down
1 change: 0 additions & 1 deletion pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -947,7 +947,6 @@ def view(self, cls=None):
# e.g. m8[s]
return self._data.view(cls)

arr = self._data.view("i8")
idx_cls = self._dtype_to_subclass(dtype)
arr_cls = idx_cls._data_cls
arr = arr_cls(self._data.view("i8"), dtype=dtype)
Expand Down
10 changes: 6 additions & 4 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -802,9 +802,11 @@ def view(self, dtype: Dtype | None = None) -> Series:
4 2
dtype: int8
"""
return self._constructor(
self._values.view(dtype), index=self.index
).__finalize__(self, method="view")
# self.array instead of self._values so we piggyback on PandasArray
# implementation
res_values = self.array.view(dtype)
res_ser = self._constructor(res_values, index=self.index)
return res_ser.__finalize__(self, method="view")

# ----------------------------------------------------------------------
# NDArray Compat
Expand Down Expand Up @@ -1975,7 +1977,7 @@ def count(self, level=None):
self, method="count"
)

def mode(self, dropna=True) -> Series:
def mode(self, dropna: bool = True) -> Series:
"""
Return the mode(s) of the Series.

Expand Down
12 changes: 12 additions & 0 deletions pandas/tests/series/methods/test_view.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,18 @@


class TestView:
def test_view_i8_to_datetimelike(self):
dti = date_range("2000", periods=4, tz="US/Central")
ser = Series(dti.asi8)

result = ser.view(dti.dtype)
tm.assert_datetime_array_equal(result._values, dti._data._with_freq(None))

pi = dti.tz_localize(None).to_period("D")
ser = Series(pi.asi8)
result = ser.view(pi.dtype)
tm.assert_period_array_equal(result._values, pi._data)

def test_view_tz(self):
# GH#24024
ser = Series(date_range("2000", periods=4, tz="US/Central"))
Expand Down
21 changes: 21 additions & 0 deletions pandas/tests/series/test_reductions.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,27 @@
import pandas._testing as tm


@pytest.mark.parametrize("as_period", [True, False])
def test_mode_extension_dtype(as_period):
# GH#41927 preserve dt64tz dtype
ser = Series([pd.Timestamp(1979, 4, n) for n in range(1, 5)])

if as_period:
ser = ser.dt.to_period("D")
else:
ser = ser.dt.tz_localize("US/Central")

res = ser.mode()
assert res.dtype == ser.dtype
tm.assert_series_equal(res, ser)

res = pd.core.algorithms.mode(ser._values)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

don't we have a rule which prevents this? e.g. can you import at the top instead

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not sure about the rule. will move the import

tm.assert_series_equal(res, ser)

res = pd.core.algorithms.mode(pd.Index(ser))
tm.assert_series_equal(res, ser)


def test_reductions_td64_with_nat():
# GH#8617
ser = Series([0, pd.NaT], dtype="m8[ns]")
Expand Down