Skip to content

Commit 680e372

Browse files
authored
REF: Implement EA._mode, de-special-case categorical/dtlike (#45033)
1 parent 7b7b025 commit 680e372

File tree

8 files changed

+122
-69
lines changed

8 files changed

+122
-69
lines changed

pandas/core/algorithms.py

+8-19
Original file line numberDiff line numberDiff line change
@@ -935,7 +935,7 @@ def duplicated(
935935
return htable.duplicated(values, keep=keep)
936936

937937

938-
def mode(values, dropna: bool = True) -> Series:
938+
def mode(values: ArrayLike, dropna: bool = True) -> ArrayLike:
939939
"""
940940
Returns the mode(s) of an array.
941941
@@ -948,27 +948,17 @@ def mode(values, dropna: bool = True) -> Series:
948948
949949
Returns
950950
-------
951-
mode : Series
951+
np.ndarray or ExtensionArray
952952
"""
953-
from pandas import Series
954-
from pandas.core.indexes.api import default_index
955-
956953
values = _ensure_arraylike(values)
957954
original = values
958955

959-
# categorical is a fast-path
960-
if is_categorical_dtype(values.dtype):
961-
if isinstance(values, Series):
962-
# TODO: should we be passing `name` below?
963-
return Series(values._values.mode(dropna=dropna), name=values.name)
964-
return values.mode(dropna=dropna)
965-
966956
if needs_i8_conversion(values.dtype):
967-
if dropna:
968-
mask = values.isna()
969-
values = values[~mask]
970-
modes = mode(values.view("i8"))
971-
return modes.view(original.dtype)
957+
# Got here with ndarray; dispatch to DatetimeArray/TimedeltaArray.
958+
values = ensure_wrapped_if_datetimelike(values)
959+
# error: Item "ndarray[Any, Any]" of "Union[ExtensionArray,
960+
# ndarray[Any, Any]]" has no attribute "_mode"
961+
return values._mode(dropna=dropna) # type: ignore[union-attr]
972962

973963
values = _ensure_data(values)
974964

@@ -979,8 +969,7 @@ def mode(values, dropna: bool = True) -> Series:
979969
warn(f"Unable to sort modes: {err}")
980970

981971
result = _reconstruct_data(npresult, original.dtype, original)
982-
# Ensure index is type stable (should always use int index)
983-
return Series(result, index=default_index(len(result)))
972+
return result
984973

985974

986975
def rank(

pandas/core/arrays/base.py

+21
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@
7373
from pandas.core.algorithms import (
7474
factorize_array,
7575
isin,
76+
mode,
7677
rank,
7778
unique,
7879
)
@@ -1578,6 +1579,26 @@ def _quantile(
15781579

15791580
return result
15801581

1582+
def _mode(self: ExtensionArrayT, dropna: bool = True) -> ExtensionArrayT:
1583+
"""
1584+
Returns the mode(s) of the ExtensionArray.
1585+
1586+
Always returns `ExtensionArray` even if only one value.
1587+
1588+
Parameters
1589+
----------
1590+
dropna : bool, default True
1591+
Don't consider counts of NA values.
1592+
1593+
Returns
1594+
-------
1595+
same type as self
1596+
Sorted, if possible.
1597+
"""
1598+
# error: Incompatible return value type (got "Union[ExtensionArray,
1599+
# ndarray[Any, Any]]", expected "ExtensionArrayT")
1600+
return mode(self, dropna=dropna) # type: ignore[return-value]
1601+
15811602
def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
15821603
if any(
15831604
isinstance(other, (ABCSeries, ABCIndex, ABCDataFrame)) for other in inputs

pandas/core/arrays/categorical.py

+10-1
Original file line numberDiff line numberDiff line change
@@ -2229,7 +2229,7 @@ def max(self, *, skipna=True, **kwargs):
22292229
pointer = self._codes.max()
22302230
return self._wrap_reduction_result(None, pointer)
22312231

2232-
def mode(self, dropna=True):
2232+
def mode(self, dropna: bool = True) -> Categorical:
22332233
"""
22342234
Returns the mode(s) of the Categorical.
22352235
@@ -2244,6 +2244,15 @@ def mode(self, dropna=True):
22442244
-------
22452245
modes : `Categorical` (sorted)
22462246
"""
2247+
warn(
2248+
"Categorical.mode is deprecated and will be removed in a future version. "
2249+
"Use Series.mode instead.",
2250+
FutureWarning,
2251+
stacklevel=find_stack_level(),
2252+
)
2253+
return self._mode(dropna=dropna)
2254+
2255+
def _mode(self, dropna: bool = True) -> Categorical:
22472256
codes = self._codes
22482257
if dropna:
22492258
good = self._codes != -1

pandas/core/arrays/datetimelike.py

+12
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@
9999
from pandas.core.algorithms import (
100100
checked_add_with_arr,
101101
isin,
102+
mode,
102103
unique1d,
103104
)
104105
from pandas.core.arraylike import OpsMixin
@@ -1531,6 +1532,17 @@ def median(self, *, axis: int | None = None, skipna: bool = True, **kwargs):
15311532
result = nanops.nanmedian(self._ndarray, axis=axis, skipna=skipna)
15321533
return self._wrap_reduction_result(axis, result)
15331534

1535+
def _mode(self, dropna: bool = True):
1536+
values = self
1537+
if dropna:
1538+
mask = values.isna()
1539+
values = values[~mask]
1540+
1541+
i8modes = mode(values.view("i8"))
1542+
npmodes = i8modes.view(self._ndarray.dtype)
1543+
npmodes = cast(np.ndarray, npmodes)
1544+
return self._from_backing_data(npmodes)
1545+
15341546

15351547
class DatelikeOps(DatetimeLikeArrayMixin):
15361548
"""

pandas/core/series.py

+10-1
Original file line numberDiff line numberDiff line change
@@ -1999,7 +1999,16 @@ def mode(self, dropna: bool = True) -> Series:
19991999
Modes of the Series in sorted order.
20002000
"""
20012001
# TODO: Add option for bins like value_counts()
2002-
return algorithms.mode(self, dropna=dropna)
2002+
values = self._values
2003+
if isinstance(values, np.ndarray):
2004+
res_values = algorithms.mode(values, dropna=dropna)
2005+
else:
2006+
res_values = values._mode(dropna=dropna)
2007+
2008+
# Ensure index is type stable (should always use int index)
2009+
return self._constructor(
2010+
res_values, index=range(len(res_values)), name=self.name
2011+
)
20032012

20042013
def unique(self) -> ArrayLike:
20052014
"""

pandas/tests/arrays/categorical/test_analytics.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,9 @@ def test_numpy_min_max_axis_equals_none(self, method, expected):
147147
)
148148
def test_mode(self, values, categories, exp_mode):
149149
s = Categorical(values, categories=categories, ordered=True)
150-
res = s.mode()
150+
msg = "Use Series.mode instead"
151+
with tm.assert_produces_warning(FutureWarning, match=msg):
152+
res = s.mode()
151153
exp = Categorical(exp_mode, categories=categories, ordered=True)
152154
tm.assert_categorical_equal(res, exp)
153155

pandas/tests/series/test_reductions.py

-7
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
Series,
88
)
99
import pandas._testing as tm
10-
from pandas.core.algorithms import mode
1110

1211

1312
@pytest.mark.parametrize("as_period", [True, False])
@@ -24,12 +23,6 @@ def test_mode_extension_dtype(as_period):
2423
assert res.dtype == ser.dtype
2524
tm.assert_series_equal(res, ser)
2625

27-
res = mode(ser._values)
28-
tm.assert_series_equal(res, ser)
29-
30-
res = mode(pd.Index(ser))
31-
tm.assert_series_equal(res, ser)
32-
3326

3427
def test_reductions_td64_with_nat():
3528
# GH#8617

pandas/tests/test_algos.py

+58-40
Original file line numberDiff line numberDiff line change
@@ -2261,7 +2261,7 @@ def test_int64_add_overflow():
22612261
class TestMode:
22622262
def test_no_mode(self):
22632263
exp = Series([], dtype=np.float64, index=Index([], dtype=int))
2264-
tm.assert_series_equal(algos.mode([]), exp)
2264+
tm.assert_numpy_array_equal(algos.mode([]), exp.values)
22652265

22662266
@pytest.mark.parametrize("dt", np.typecodes["AllInteger"] + np.typecodes["Float"])
22672267
def test_mode_single(self, dt):
@@ -2272,20 +2272,22 @@ def test_mode_single(self, dt):
22722272
exp_multi = [1]
22732273
data_multi = [1, 1]
22742274

2275-
s = Series(data_single, dtype=dt)
2275+
ser = Series(data_single, dtype=dt)
22762276
exp = Series(exp_single, dtype=dt)
2277-
tm.assert_series_equal(algos.mode(s), exp)
2277+
tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values)
2278+
tm.assert_series_equal(ser.mode(), exp)
22782279

2279-
s = Series(data_multi, dtype=dt)
2280+
ser = Series(data_multi, dtype=dt)
22802281
exp = Series(exp_multi, dtype=dt)
2281-
tm.assert_series_equal(algos.mode(s), exp)
2282+
tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values)
2283+
tm.assert_series_equal(ser.mode(), exp)
22822284

22832285
def test_mode_obj_int(self):
22842286
exp = Series([1], dtype=int)
2285-
tm.assert_series_equal(algos.mode([1]), exp)
2287+
tm.assert_numpy_array_equal(algos.mode([1]), exp.values)
22862288

22872289
exp = Series(["a", "b", "c"], dtype=object)
2288-
tm.assert_series_equal(algos.mode(["a", "b", "c"]), exp)
2290+
tm.assert_numpy_array_equal(algos.mode(["a", "b", "c"]), exp.values)
22892291

22902292
@pytest.mark.parametrize("dt", np.typecodes["AllInteger"] + np.typecodes["Float"])
22912293
def test_number_mode(self, dt):
@@ -2295,104 +2297,120 @@ def test_number_mode(self, dt):
22952297
exp_multi = [1, 3]
22962298
data_multi = [1] * 5 + [2] * 3 + [3] * 5
22972299

2298-
s = Series(data_single, dtype=dt)
2300+
ser = Series(data_single, dtype=dt)
22992301
exp = Series(exp_single, dtype=dt)
2300-
tm.assert_series_equal(algos.mode(s), exp)
2302+
tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values)
2303+
tm.assert_series_equal(ser.mode(), exp)
23012304

2302-
s = Series(data_multi, dtype=dt)
2305+
ser = Series(data_multi, dtype=dt)
23032306
exp = Series(exp_multi, dtype=dt)
2304-
tm.assert_series_equal(algos.mode(s), exp)
2307+
tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values)
2308+
tm.assert_series_equal(ser.mode(), exp)
23052309

23062310
def test_strobj_mode(self):
23072311
exp = ["b"]
23082312
data = ["a"] * 2 + ["b"] * 3
23092313

2310-
s = Series(data, dtype="c")
2314+
ser = Series(data, dtype="c")
23112315
exp = Series(exp, dtype="c")
2312-
tm.assert_series_equal(algos.mode(s), exp)
2316+
tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values)
2317+
tm.assert_series_equal(ser.mode(), exp)
23132318

23142319
@pytest.mark.parametrize("dt", [str, object])
23152320
def test_strobj_multi_char(self, dt):
23162321
exp = ["bar"]
23172322
data = ["foo"] * 2 + ["bar"] * 3
23182323

2319-
s = Series(data, dtype=dt)
2324+
ser = Series(data, dtype=dt)
23202325
exp = Series(exp, dtype=dt)
2321-
tm.assert_series_equal(algos.mode(s), exp)
2326+
tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values)
2327+
tm.assert_series_equal(ser.mode(), exp)
23222328

23232329
def test_datelike_mode(self):
23242330
exp = Series(["1900-05-03", "2011-01-03", "2013-01-02"], dtype="M8[ns]")
2325-
s = Series(["2011-01-03", "2013-01-02", "1900-05-03"], dtype="M8[ns]")
2326-
tm.assert_series_equal(algos.mode(s), exp)
2331+
ser = Series(["2011-01-03", "2013-01-02", "1900-05-03"], dtype="M8[ns]")
2332+
tm.assert_extension_array_equal(algos.mode(ser.values), exp._values)
2333+
tm.assert_series_equal(ser.mode(), exp)
23272334

23282335
exp = Series(["2011-01-03", "2013-01-02"], dtype="M8[ns]")
2329-
s = Series(
2336+
ser = Series(
23302337
["2011-01-03", "2013-01-02", "1900-05-03", "2011-01-03", "2013-01-02"],
23312338
dtype="M8[ns]",
23322339
)
2333-
tm.assert_series_equal(algos.mode(s), exp)
2340+
tm.assert_extension_array_equal(algos.mode(ser.values), exp._values)
2341+
tm.assert_series_equal(ser.mode(), exp)
23342342

23352343
def test_timedelta_mode(self):
23362344
exp = Series(["-1 days", "0 days", "1 days"], dtype="timedelta64[ns]")
2337-
s = Series(["1 days", "-1 days", "0 days"], dtype="timedelta64[ns]")
2338-
tm.assert_series_equal(algos.mode(s), exp)
2345+
ser = Series(["1 days", "-1 days", "0 days"], dtype="timedelta64[ns]")
2346+
tm.assert_extension_array_equal(algos.mode(ser.values), exp._values)
2347+
tm.assert_series_equal(ser.mode(), exp)
23392348

23402349
exp = Series(["2 min", "1 day"], dtype="timedelta64[ns]")
2341-
s = Series(
2350+
ser = Series(
23422351
["1 day", "1 day", "-1 day", "-1 day 2 min", "2 min", "2 min"],
23432352
dtype="timedelta64[ns]",
23442353
)
2345-
tm.assert_series_equal(algos.mode(s), exp)
2354+
tm.assert_extension_array_equal(algos.mode(ser.values), exp._values)
2355+
tm.assert_series_equal(ser.mode(), exp)
23462356

23472357
def test_mixed_dtype(self):
23482358
exp = Series(["foo"])
2349-
s = Series([1, "foo", "foo"])
2350-
tm.assert_series_equal(algos.mode(s), exp)
2359+
ser = Series([1, "foo", "foo"])
2360+
tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values)
2361+
tm.assert_series_equal(ser.mode(), exp)
23512362

23522363
def test_uint64_overflow(self):
23532364
exp = Series([2 ** 63], dtype=np.uint64)
2354-
s = Series([1, 2 ** 63, 2 ** 63], dtype=np.uint64)
2355-
tm.assert_series_equal(algos.mode(s), exp)
2365+
ser = Series([1, 2 ** 63, 2 ** 63], dtype=np.uint64)
2366+
tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values)
2367+
tm.assert_series_equal(ser.mode(), exp)
23562368

23572369
exp = Series([1, 2 ** 63], dtype=np.uint64)
2358-
s = Series([1, 2 ** 63], dtype=np.uint64)
2359-
tm.assert_series_equal(algos.mode(s), exp)
2370+
ser = Series([1, 2 ** 63], dtype=np.uint64)
2371+
tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values)
2372+
tm.assert_series_equal(ser.mode(), exp)
23602373

23612374
def test_categorical(self):
23622375
c = Categorical([1, 2])
23632376
exp = c
2364-
tm.assert_categorical_equal(algos.mode(c), exp)
2365-
tm.assert_categorical_equal(c.mode(), exp)
2377+
msg = "Categorical.mode is deprecated"
2378+
with tm.assert_produces_warning(FutureWarning, match=msg):
2379+
res = c.mode()
2380+
tm.assert_categorical_equal(res, exp)
23662381

23672382
c = Categorical([1, "a", "a"])
23682383
exp = Categorical(["a"], categories=[1, "a"])
2369-
tm.assert_categorical_equal(algos.mode(c), exp)
2370-
tm.assert_categorical_equal(c.mode(), exp)
2384+
with tm.assert_produces_warning(FutureWarning, match=msg):
2385+
res = c.mode()
2386+
tm.assert_categorical_equal(res, exp)
23712387

23722388
c = Categorical([1, 1, 2, 3, 3])
23732389
exp = Categorical([1, 3], categories=[1, 2, 3])
2374-
tm.assert_categorical_equal(algos.mode(c), exp)
2375-
tm.assert_categorical_equal(c.mode(), exp)
2390+
with tm.assert_produces_warning(FutureWarning, match=msg):
2391+
res = c.mode()
2392+
tm.assert_categorical_equal(res, exp)
23762393

23772394
def test_index(self):
23782395
idx = Index([1, 2, 3])
23792396
exp = Series([1, 2, 3], dtype=np.int64)
2380-
tm.assert_series_equal(algos.mode(idx), exp)
2397+
tm.assert_numpy_array_equal(algos.mode(idx), exp.values)
23812398

23822399
idx = Index([1, "a", "a"])
23832400
exp = Series(["a"], dtype=object)
2384-
tm.assert_series_equal(algos.mode(idx), exp)
2401+
tm.assert_numpy_array_equal(algos.mode(idx), exp.values)
23852402

23862403
idx = Index([1, 1, 2, 3, 3])
23872404
exp = Series([1, 3], dtype=np.int64)
2388-
tm.assert_series_equal(algos.mode(idx), exp)
2405+
tm.assert_numpy_array_equal(algos.mode(idx), exp.values)
23892406

2390-
exp = Series(["2 min", "1 day"], dtype="timedelta64[ns]")
23912407
idx = Index(
23922408
["1 day", "1 day", "-1 day", "-1 day 2 min", "2 min", "2 min"],
23932409
dtype="timedelta64[ns]",
23942410
)
2395-
tm.assert_series_equal(algos.mode(idx), exp)
2411+
with pytest.raises(AttributeError, match="TimedeltaIndex"):
2412+
# algos.mode expects Arraylike, does *not* unwrap TimedeltaIndex
2413+
algos.mode(idx)
23962414

23972415

23982416
class TestDiff:

0 commit comments

Comments
 (0)