Skip to content

Commit 0d1e383

Browse files
committed
CLN: Mode result needs to be sorted, otherwise hit weirdness with hashing
1 parent 7841d9a commit 0d1e383

File tree

4 files changed

+42
-14
lines changed

4 files changed

+42
-14
lines changed

pandas/core/algorithms.py

+11-3
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
intended for public consumption
44
"""
55

6+
from warnings import warn
67
import numpy as np
78

89
import pandas.core.common as com
@@ -222,6 +223,8 @@ def value_counts(values, sort=True, ascending=False, normalize=False, bins=None)
222223

223224

224225
def mode(values):
226+
"Returns the mode or mode(s) of the passed Series or ndarray (sorted)"
227+
# must sort because hash order isn't necessarily defined.
225228
from pandas.core.series import Series
226229

227230
if isinstance(values, Series):
@@ -234,17 +237,22 @@ def mode(values):
234237
dtype = values.dtype
235238
if com.is_integer_dtype(values.dtype):
236239
values = com._ensure_int64(values)
237-
result = constructor(htable.mode_int64(values), dtype=dtype)
240+
result = constructor(sorted(htable.mode_int64(values)), dtype=dtype)
238241

239242
elif issubclass(values.dtype.type, (np.datetime64,np.timedelta64)):
240243
dtype = values.dtype
241244
values = values.view(np.int64)
242-
result = constructor(htable.mode_int64(values), dtype=dtype)
245+
result = constructor(sorted(htable.mode_int64(values)), dtype=dtype)
243246

244247
else:
245248
mask = com.isnull(values)
246249
values = com._ensure_object(values)
247-
result = constructor(htable.mode_object(values, mask), dtype=dtype)
250+
res = htable.mode_object(values, mask)
251+
try:
252+
res = sorted(res)
253+
except TypeError as e:
254+
warn("Unable to sort modes: %s" % e)
255+
result = constructor(res, dtype=dtype)
248256

249257
return result
250258

pandas/core/frame.py

+4
Original file line numberDiff line numberDiff line change
@@ -4096,6 +4096,10 @@ def mode(self, axis=0, numeric_only=False):
40964096
1/'columns' : get mode of each row
40974097
numeric_only : bool, default False
40984098
if True, only apply to numeric columns
4099+
4100+
Returns
4101+
-------
4102+
modes : DataFrame (sorted)
40994103
"""
41004104
data = self if not numeric_only else self._get_numeric_data()
41014105
f = lambda s: s.mode()

pandas/core/series.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -1133,9 +1133,15 @@ def mode(self):
11331133
Empty if nothing occurs at least 2 times. Always returns Series even
11341134
if only one value.
11351135
1136+
Parameters
1137+
----------
1138+
sort : bool, default True
1139+
if True, will lexicographically sort values, if False skips
1140+
sorting. Result ordering when ``sort=False`` is not defined.
1141+
11361142
Returns
11371143
-------
1138-
modes : Series
1144+
modes : Series (sorted)
11391145
"""
11401146
# TODO: Add option for bins like value_counts()
11411147
from pandas.core.algorithms import mode

pandas/tests/test_frame.py

+20-10
Original file line numberDiff line numberDiff line change
@@ -10052,28 +10052,38 @@ def wrapper(x):
1005210052

1005310053
def test_mode(self):
1005410054
df = pd.DataFrame({"A": [12, 12, 11, 12, 19, 11],
10055-
"B": [10, 10, 10, 5, 3, 4],
10055+
"B": [10, 10, 10, np.nan, 3, 4],
1005610056
"C": [8, 8, 8, 9, 9, 9],
10057-
"D": range(6)})
10057+
"D": range(6),
10058+
"E": [8, 8, 1, 1, 3, 3]})
1005810059
assert_frame_equal(df[["A"]].mode(),
1005910060
pd.DataFrame({"A": [12]}))
1006010061
assert_frame_equal(df[["D"]].mode(),
1006110062
pd.DataFrame(pd.Series([], dtype="int64"),
1006210063
columns=["D"]))
10064+
assert_frame_equal(df[["E"]].mode(),
10065+
pd.DataFrame(pd.Series([1, 3, 8], dtype="int64"),
10066+
columns=["E"]))
1006310067
assert_frame_equal(df[["A", "B"]].mode(),
10064-
pd.DataFrame({"A": [12], "B": [10]}))
10068+
pd.DataFrame({"A": [12], "B": [10.]}))
1006510069
assert_frame_equal(df.mode(),
10066-
pd.DataFrame({"A": [12, np.nan],
10067-
"B": [10, np.nan],
10068-
"C": [8, 9],
10069-
"D": [np.nan, np.nan]}))
10070+
pd.DataFrame({"A": [12, np.nan, np.nan],
10071+
"B": [10, np.nan, np.nan],
10072+
"C": [8, 9, np.nan],
10073+
"D": [np.nan, np.nan, np.nan],
10074+
"E": [1, 3, 8]}))
1007010075

10071-
# should preserve order
10076+
# outputs in sorted order
1007210077
df["C"] = list(reversed(df["C"]))
10073-
assert_frame_equal(df[["A", "B", "C"]].mode(),
10078+
print(df["C"])
10079+
print(df["C"].mode())
10080+
a, b = (df[["A", "B", "C"]].mode(),
1007410081
pd.DataFrame({"A": [12, np.nan],
1007510082
"B": [10, np.nan],
10076-
"C": [9, 8]}))
10083+
"C": [8, 9]}))
10084+
print(a)
10085+
print(b)
10086+
assert_frame_equal(a, b)
1007710087
# should work with heterogeneous types
1007810088
df = pd.DataFrame({"A": range(6),
1007910089
"B": pd.date_range('2011', periods=6),

0 commit comments

Comments
 (0)