Skip to content

Return mode even if single value (#15714) #15744

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pandas/_libs/hashtable_func_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ def mode_{{dtype}}(ndarray[{{ctype}}] values):
def mode_{{dtype}}({{ctype}}[:] values):
{{endif}}
cdef:
int count, max_count = 2
int count, max_count = 1
int j = -1 # so you can do +=
Py_ssize_t k
kh_{{table_type}}_t *table
Expand Down
3 changes: 1 addition & 2 deletions pandas/core/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -1876,8 +1876,7 @@ def mode(self):
"""
Returns the mode(s) of the Categorical.

Empty if nothing occurs at least 2 times. Always returns `Categorical`
even if only one value.
Always returns `Categorical` even if only one value.

Returns
-------
Expand Down
5 changes: 2 additions & 3 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -5168,9 +5168,8 @@ def _get_agg_axis(self, axis_num):

def mode(self, axis=0, numeric_only=False):
"""
Gets the mode(s) of each element along the axis selected. Empty if
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in this (or followup), we should make this a shared doc-string :>

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jreback — not sure how to do this, I reckon this is to reference the same-ish doc string in all relevant places?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes you woould move def mode to generic.py (leaving a stub here and in series for an @Appender doc-string (look at virtually any other doc-string, e.g. fillna)

can do this as a follow up though (or here, up 2 u)

nothing has 2+ occurrences. Adds a row for each mode per label, fills
in gaps with nan.
Gets the mode(s) of each element along the axis selected. Adds a row
for each mode per label, fills in gaps with nan.

Note that there could be multiple values returned for the selected
axis (when more than one item share the maximum frequency), which is
Expand Down
3 changes: 1 addition & 2 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1192,8 +1192,7 @@ def count(self, level=None):
def mode(self):
"""Return the mode(s) of the dataset.

Empty if nothing occurs at least 2 times. Always returns Series even
if only one value is returned.
Always returns Series even if only one value is returned.

Returns
-------
Expand Down
29 changes: 9 additions & 20 deletions pandas/tests/frame/test_analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -809,18 +809,18 @@ def test_mode(self):
"E": [8, 8, 1, 1, 3, 3]})
tm.assert_frame_equal(df[["A"]].mode(),
pd.DataFrame({"A": [12]}))
expected = pd.Series([], dtype='int64', name='D').to_frame()
expected = pd.Series([0, 1, 2, 3, 4, 5], dtype='int64', name='D').to_frame()
tm.assert_frame_equal(df[["D"]].mode(), expected)
expected = pd.Series([1, 3, 8], dtype='int64', name='E').to_frame()
tm.assert_frame_equal(df[["E"]].mode(), expected)
tm.assert_frame_equal(df[["A", "B"]].mode(),
pd.DataFrame({"A": [12], "B": [10.]}))
tm.assert_frame_equal(df.mode(),
pd.DataFrame({"A": [12, np.nan, np.nan],
"B": [10, np.nan, np.nan],
"C": [8, 9, np.nan],
"D": [np.nan, np.nan, np.nan],
"E": [1, 3, 8]}))
pd.DataFrame({"A": [12, np.nan, np.nan, np.nan, np.nan, np.nan],
"B": [10, np.nan, np.nan, np.nan, np.nan, np.nan],
"C": [8, 9, np.nan, np.nan, np.nan, np.nan],
"D": [0, 1, 2, 3, 4, 5],
"E": [1, 3, 8, np.nan, np.nan, np.nan]}))

# outputs in sorted order
df["C"] = list(reversed(df["C"]))
Expand All @@ -837,20 +837,9 @@ def test_mode(self):
df = pd.DataFrame({"A": np.arange(6, dtype='int64'),
"B": pd.date_range('2011', periods=6),
"C": list('abcdef')})
exp = pd.DataFrame({"A": pd.Series([], dtype=df["A"].dtype),
"B": pd.Series([], dtype=df["B"].dtype),
"C": pd.Series([], dtype=df["C"].dtype)})
tm.assert_frame_equal(df.mode(), exp)

# and also when not empty
df.loc[1, "A"] = 0
df.loc[4, "B"] = df.loc[3, "B"]
df.loc[5, "C"] = 'e'
exp = pd.DataFrame({"A": pd.Series([0], dtype=df["A"].dtype),
"B": pd.Series([df.loc[3, "B"]],
dtype=df["B"].dtype),
"C": pd.Series(['e'], dtype=df["C"].dtype)})

exp = pd.DataFrame({"A": pd.Series(np.arange(6, dtype='int64'), dtype=df["A"].dtype),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you have flake issues

git diff master | flake8 --diff

see here

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fixed all but the ones in the hashtable_func_helper.pxi.in — for some reason it's complaining about lines that I haven't even touched / don't show up in the diffs here on GH

"B": pd.Series(pd.date_range('2011', periods=6), dtype=df["B"].dtype),
"C": pd.Series(list('abcdef'), dtype=df["C"].dtype)})
tm.assert_frame_equal(df.mode(), exp)

def test_operators_timedelta64(self):
Expand Down
13 changes: 7 additions & 6 deletions pandas/tests/series/test_analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,10 +130,10 @@ def test_mode(self):
exp = Series([], dtype=np.float64)
tm.assert_series_equal(Series([]).mode(), exp)

exp = Series([], dtype=np.int64)
exp = Series([1], dtype=np.int64)
tm.assert_series_equal(Series([1]).mode(), exp)

exp = Series([], dtype=np.object)
exp = Series(['a','b','c'], dtype=np.object)
tm.assert_series_equal(Series(['a', 'b', 'c']).mode(), exp)

# Test numerical data types.
Expand Down Expand Up @@ -169,7 +169,8 @@ def test_mode(self):
tm.assert_series_equal(s.mode(), exp)

# Test datetime types.
exp = Series([], dtype="M8[ns]")
exp = Series(['1900-05-03', '2011-01-03',
'2013-01-02'], dtype='M8[ns]')
s = Series(['2011-01-03', '2013-01-02',
'1900-05-03'], dtype='M8[ns]')
tm.assert_series_equal(s.mode(), exp)
Expand All @@ -180,7 +181,7 @@ def test_mode(self):
tm.assert_series_equal(s.mode(), exp)

# gh-5986: Test timedelta types.
exp = Series([], dtype='timedelta64[ns]')
exp = Series(['-1 days', '0 days', '1 days'], dtype='timedelta64[ns]')
s = Series(['1 days', '-1 days', '0 days'],
dtype='timedelta64[ns]')
tm.assert_series_equal(s.mode(), exp)
Expand All @@ -200,13 +201,13 @@ def test_mode(self):
s = Series([1, 2**63, 2**63], dtype=np.uint64)
tm.assert_series_equal(s.mode(), exp)

exp = Series([], dtype=np.uint64)
exp = Series([1, 2**63], dtype=np.uint64)
s = Series([1, 2**63], dtype=np.uint64)
tm.assert_series_equal(s.mode(), exp)

# Test category dtype.
c = Categorical([1, 2])
exp = Categorical([], categories=[1, 2])
exp = Categorical([1, 2], categories=[1, 2])
exp = Series(exp, dtype='category')
tm.assert_series_equal(Series(c).mode(), exp)

Expand Down
34 changes: 27 additions & 7 deletions pandas/tests/test_algos.py
Original file line number Diff line number Diff line change
Expand Up @@ -1256,12 +1256,30 @@ def test_no_mode(self):
exp = Series([], dtype=np.float64)
tm.assert_series_equal(algos.mode([]), exp)

exp = Series([], dtype=np.int)
def test_mode_single(self):
exp_single = [1]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add the issue number as a comment

data_single = [1]

for dt in np.typecodes['AllInteger'] + np.typecodes['Float']:
s = Series(data_single, dtype=dt)
exp = Series(exp_single, dtype=dt)
tm.assert_series_equal(algos.mode(s), exp)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add the result for a double-but-same value

e.g.

data = [1, 1] (exp should be the same)

exp = Series([1], dtype=np.int)
tm.assert_series_equal(algos.mode([1]), exp)

exp = Series([], dtype=np.object)
exp = Series(['a', 'b', 'c'], dtype=np.object)
tm.assert_series_equal(algos.mode(['a', 'b', 'c']), exp)

def test_mode_single(self):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this looks duplicated

exp_single = [1]
data_single = [1]

for dt in np.typecodes['AllInteger'] + np.typecodes['Float']:
s = Series(data_single, dtype=dt)
exp = Series(exp_single, dtype=dt)
tm.assert_series_equal(algos.mode(s), exp)

def test_number_mode(self):
exp_single = [1]
data_single = [1] * 5 + [2] * 3
Expand Down Expand Up @@ -1295,7 +1313,8 @@ def test_strobj_mode(self):
tm.assert_series_equal(algos.mode(s), exp)

def test_datelike_mode(self):
exp = Series([], dtype="M8[ns]")
exp = Series(['1900-05-03', '2011-01-03',
'2013-01-02'], dtype="M8[ns]")
s = Series(['2011-01-03', '2013-01-02',
'1900-05-03'], dtype='M8[ns]')
tm.assert_series_equal(algos.mode(s), exp)
Expand All @@ -1306,7 +1325,8 @@ def test_datelike_mode(self):
tm.assert_series_equal(algos.mode(s), exp)

def test_timedelta_mode(self):
exp = Series([], dtype='timedelta64[ns]')
exp = Series(['-1 days', '0 days', '1 days'],
dtype='timedelta64[ns]')
s = Series(['1 days', '-1 days', '0 days'],
dtype='timedelta64[ns]')
tm.assert_series_equal(algos.mode(s), exp)
Expand All @@ -1326,13 +1346,13 @@ def test_uint64_overflow(self):
s = Series([1, 2**63, 2**63], dtype=np.uint64)
tm.assert_series_equal(algos.mode(s), exp)

exp = Series([], dtype=np.uint64)
exp = Series([1, 2**63], dtype=np.uint64)
s = Series([1, 2**63], dtype=np.uint64)
tm.assert_series_equal(algos.mode(s), exp)

def test_categorical(self):
c = Categorical([1, 2])
exp = Series([], dtype=np.int64)
exp = Series([1, 2], dtype=np.int64)
tm.assert_series_equal(algos.mode(c), exp)

c = Categorical([1, 'a', 'a'])
Expand All @@ -1345,7 +1365,7 @@ def test_categorical(self):

def test_index(self):
idx = Index([1, 2, 3])
exp = Series([], dtype=np.int64)
exp = Series([1, 2, 3], dtype=np.int64)
tm.assert_series_equal(algos.mode(idx), exp)

idx = Index([1, 'a', 'a'])
Expand Down
6 changes: 3 additions & 3 deletions pandas/tests/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -1372,13 +1372,13 @@ def test_mode(self):
s = Categorical([1, 2, 3, 4, 5], categories=[5, 4, 3, 2, 1],
ordered=True)
res = s.mode()
exp = Categorical([], categories=[5, 4, 3, 2, 1], ordered=True)
exp = Categorical([5, 4, 3, 2, 1], categories=[5, 4, 3, 2, 1], ordered=True)
tm.assert_categorical_equal(res, exp)
# NaN should not become the mode!
s = Categorical([np.nan, np.nan, np.nan, 4, 5],
categories=[5, 4, 3, 2, 1], ordered=True)
res = s.mode()
exp = Categorical([], categories=[5, 4, 3, 2, 1], ordered=True)
exp = Categorical([5, 4], categories=[5, 4, 3, 2, 1], ordered=True)
tm.assert_categorical_equal(res, exp)
s = Categorical([np.nan, np.nan, np.nan, 4, 5, 4],
categories=[5, 4, 3, 2, 1], ordered=True)
Expand Down Expand Up @@ -2980,7 +2980,7 @@ def test_mode(self):
s = Series(Categorical([1, 2, 3, 4, 5], categories=[5, 4, 3, 2, 1],
ordered=True))
res = s.mode()
exp = Series(Categorical([], categories=[5, 4, 3, 2, 1], ordered=True))
exp = Series(Categorical([5, 4, 3, 2, 1], categories=[5, 4, 3, 2, 1], ordered=True))
tm.assert_series_equal(res, exp)

def test_value_counts(self):
Expand Down