Skip to content

Commit de589c2

Browse files
Robinjreback
Robin
authored andcommitted
BUG: Return mode even if single value (#15714)
Author: Robin <[email protected]> This patch had conflicts when merged, resolved by Committer: Jeff Reback <[email protected]> Closes #15744 from buyology/issue-15714-fix-mode and squashes the following commits: 8c08cd5 [Robin] Added multi-test and whatsnew note 5f36395 [Robin] Fixed flake issues, removed duplicate test, inserted GH issue number reference 5f829e1 [Robin] Merge conflict 0e2dec0 [Robin] Fixed tests 26db131 [Robin] Return mode even if single value (#15714) 44dbbb2 [Robin] Return mode even if single value (#15714)
1 parent 0ab0813 commit de589c2

File tree

9 files changed

+58
-43
lines changed

9 files changed

+58
-43
lines changed

doc/source/whatsnew/v0.20.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -1050,6 +1050,7 @@ Bug Fixes
10501050

10511051
- Bug in ``.read_csv()`` with ``parse_dates`` when multiline headers are specified (:issue:`15376`)
10521052
- Bug in ``groupby.transform()`` that would coerce the resultant dtypes back to the original (:issue:`10972`, :issue:`11444`)
1053+
- Bug in ``.mode()`` where ``mode`` was not returned if was only a single value (:issue:`15714`)
10531054

10541055
- Bug in ``DataFrame.hist`` where ``plt.tight_layout`` caused an ``AttributeError`` (use ``matplotlib >= 2.0.1``) (:issue:`9351`)
10551056
- Bug in ``DataFrame.boxplot`` where ``fontsize`` was not applied to the tick labels on both axes (:issue:`15108`)

pandas/_libs/hashtable_func_helper.pxi.in

+1-1
Original file line numberDiff line numberDiff line change
@@ -309,7 +309,7 @@ def mode_{{dtype}}(ndarray[{{ctype}}] values):
309309
def mode_{{dtype}}({{ctype}}[:] values):
310310
{{endif}}
311311
cdef:
312-
int count, max_count = 2
312+
int count, max_count = 1
313313
int j = -1 # so you can do +=
314314
Py_ssize_t k
315315
kh_{{table_type}}_t *table

pandas/core/categorical.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -1868,8 +1868,7 @@ def mode(self):
18681868
"""
18691869
Returns the mode(s) of the Categorical.
18701870
1871-
Empty if nothing occurs at least 2 times. Always returns `Categorical`
1872-
even if only one value.
1871+
Always returns `Categorical` even if only one value.
18731872
18741873
Returns
18751874
-------

pandas/core/frame.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -5127,9 +5127,8 @@ def _get_agg_axis(self, axis_num):
51275127

51285128
def mode(self, axis=0, numeric_only=False):
51295129
"""
5130-
Gets the mode(s) of each element along the axis selected. Empty if
5131-
nothing has 2+ occurrences. Adds a row for each mode per label, fills
5132-
in gaps with nan.
5130+
Gets the mode(s) of each element along the axis selected. Adds a row
5131+
for each mode per label, fills in gaps with nan.
51335132
51345133
Note that there could be multiple values returned for the selected
51355134
axis (when more than one item share the maximum frequency), which is

pandas/core/series.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -1192,8 +1192,7 @@ def count(self, level=None):
11921192
def mode(self):
11931193
"""Return the mode(s) of the dataset.
11941194
1195-
Empty if nothing occurs at least 2 times. Always returns Series even
1196-
if only one value is returned.
1195+
Always returns Series even if only one value is returned.
11971196
11981197
Returns
11991198
-------

pandas/tests/frame/test_analytics.py

+16-19
Original file line numberDiff line numberDiff line change
@@ -789,18 +789,23 @@ def test_mode(self):
789789
"E": [8, 8, 1, 1, 3, 3]})
790790
tm.assert_frame_equal(df[["A"]].mode(),
791791
pd.DataFrame({"A": [12]}))
792-
expected = pd.Series([], dtype='int64', name='D').to_frame()
792+
expected = pd.Series([0, 1, 2, 3, 4, 5], dtype='int64', name='D').\
793+
to_frame()
793794
tm.assert_frame_equal(df[["D"]].mode(), expected)
794795
expected = pd.Series([1, 3, 8], dtype='int64', name='E').to_frame()
795796
tm.assert_frame_equal(df[["E"]].mode(), expected)
796797
tm.assert_frame_equal(df[["A", "B"]].mode(),
797798
pd.DataFrame({"A": [12], "B": [10.]}))
798799
tm.assert_frame_equal(df.mode(),
799-
pd.DataFrame({"A": [12, np.nan, np.nan],
800-
"B": [10, np.nan, np.nan],
801-
"C": [8, 9, np.nan],
802-
"D": [np.nan, np.nan, np.nan],
803-
"E": [1, 3, 8]}))
800+
pd.DataFrame({"A": [12, np.nan, np.nan, np.nan,
801+
np.nan, np.nan],
802+
"B": [10, np.nan, np.nan, np.nan,
803+
np.nan, np.nan],
804+
"C": [8, 9, np.nan, np.nan, np.nan,
805+
np.nan],
806+
"D": [0, 1, 2, 3, 4, 5],
807+
"E": [1, 3, 8, np.nan, np.nan,
808+
np.nan]}))
804809

805810
# outputs in sorted order
806811
df["C"] = list(reversed(df["C"]))
@@ -817,20 +822,12 @@ def test_mode(self):
817822
df = pd.DataFrame({"A": np.arange(6, dtype='int64'),
818823
"B": pd.date_range('2011', periods=6),
819824
"C": list('abcdef')})
820-
exp = pd.DataFrame({"A": pd.Series([], dtype=df["A"].dtype),
821-
"B": pd.Series([], dtype=df["B"].dtype),
822-
"C": pd.Series([], dtype=df["C"].dtype)})
823-
tm.assert_frame_equal(df.mode(), exp)
824-
825-
# and also when not empty
826-
df.loc[1, "A"] = 0
827-
df.loc[4, "B"] = df.loc[3, "B"]
828-
df.loc[5, "C"] = 'e'
829-
exp = pd.DataFrame({"A": pd.Series([0], dtype=df["A"].dtype),
830-
"B": pd.Series([df.loc[3, "B"]],
825+
exp = pd.DataFrame({"A": pd.Series(np.arange(6, dtype='int64'),
826+
dtype=df["A"].dtype),
827+
"B": pd.Series(pd.date_range('2011', periods=6),
831828
dtype=df["B"].dtype),
832-
"C": pd.Series(['e'], dtype=df["C"].dtype)})
833-
829+
"C": pd.Series(list('abcdef'),
830+
dtype=df["C"].dtype)})
834831
tm.assert_frame_equal(df.mode(), exp)
835832

836833
def test_operators_timedelta64(self):

pandas/tests/series/test_analytics.py

+7-6
Original file line numberDiff line numberDiff line change
@@ -130,10 +130,10 @@ def test_mode(self):
130130
exp = Series([], dtype=np.float64)
131131
tm.assert_series_equal(Series([]).mode(), exp)
132132

133-
exp = Series([], dtype=np.int64)
133+
exp = Series([1], dtype=np.int64)
134134
tm.assert_series_equal(Series([1]).mode(), exp)
135135

136-
exp = Series([], dtype=np.object)
136+
exp = Series(['a', 'b', 'c'], dtype=np.object)
137137
tm.assert_series_equal(Series(['a', 'b', 'c']).mode(), exp)
138138

139139
# Test numerical data types.
@@ -169,7 +169,8 @@ def test_mode(self):
169169
tm.assert_series_equal(s.mode(), exp)
170170

171171
# Test datetime types.
172-
exp = Series([], dtype="M8[ns]")
172+
exp = Series(['1900-05-03', '2011-01-03',
173+
'2013-01-02'], dtype='M8[ns]')
173174
s = Series(['2011-01-03', '2013-01-02',
174175
'1900-05-03'], dtype='M8[ns]')
175176
tm.assert_series_equal(s.mode(), exp)
@@ -180,7 +181,7 @@ def test_mode(self):
180181
tm.assert_series_equal(s.mode(), exp)
181182

182183
# gh-5986: Test timedelta types.
183-
exp = Series([], dtype='timedelta64[ns]')
184+
exp = Series(['-1 days', '0 days', '1 days'], dtype='timedelta64[ns]')
184185
s = Series(['1 days', '-1 days', '0 days'],
185186
dtype='timedelta64[ns]')
186187
tm.assert_series_equal(s.mode(), exp)
@@ -200,13 +201,13 @@ def test_mode(self):
200201
s = Series([1, 2**63, 2**63], dtype=np.uint64)
201202
tm.assert_series_equal(s.mode(), exp)
202203

203-
exp = Series([], dtype=np.uint64)
204+
exp = Series([1, 2**63], dtype=np.uint64)
204205
s = Series([1, 2**63], dtype=np.uint64)
205206
tm.assert_series_equal(s.mode(), exp)
206207

207208
# Test category dtype.
208209
c = Categorical([1, 2])
209-
exp = Categorical([], categories=[1, 2])
210+
exp = Categorical([1, 2], categories=[1, 2])
210211
exp = Series(exp, dtype='category')
211212
tm.assert_series_equal(Series(c).mode(), exp)
212213

pandas/tests/test_algos.py

+26-7
Original file line numberDiff line numberDiff line change
@@ -1261,10 +1261,27 @@ def test_no_mode(self):
12611261
exp = Series([], dtype=np.float64)
12621262
tm.assert_series_equal(algos.mode([]), exp)
12631263

1264-
exp = Series([], dtype=np.int)
1264+
# GH 15714
1265+
def test_mode_single(self):
1266+
exp_single = [1]
1267+
data_single = [1]
1268+
1269+
exp_multi = [1]
1270+
data_multi = [1, 1]
1271+
1272+
for dt in np.typecodes['AllInteger'] + np.typecodes['Float']:
1273+
s = Series(data_single, dtype=dt)
1274+
exp = Series(exp_single, dtype=dt)
1275+
tm.assert_series_equal(algos.mode(s), exp)
1276+
1277+
s = Series(data_multi, dtype=dt)
1278+
exp = Series(exp_multi, dtype=dt)
1279+
tm.assert_series_equal(algos.mode(s), exp)
1280+
1281+
exp = Series([1], dtype=np.int)
12651282
tm.assert_series_equal(algos.mode([1]), exp)
12661283

1267-
exp = Series([], dtype=np.object)
1284+
exp = Series(['a', 'b', 'c'], dtype=np.object)
12681285
tm.assert_series_equal(algos.mode(['a', 'b', 'c']), exp)
12691286

12701287
def test_number_mode(self):
@@ -1300,7 +1317,8 @@ def test_strobj_mode(self):
13001317
tm.assert_series_equal(algos.mode(s), exp)
13011318

13021319
def test_datelike_mode(self):
1303-
exp = Series([], dtype="M8[ns]")
1320+
exp = Series(['1900-05-03', '2011-01-03',
1321+
'2013-01-02'], dtype="M8[ns]")
13041322
s = Series(['2011-01-03', '2013-01-02',
13051323
'1900-05-03'], dtype='M8[ns]')
13061324
tm.assert_series_equal(algos.mode(s), exp)
@@ -1311,7 +1329,8 @@ def test_datelike_mode(self):
13111329
tm.assert_series_equal(algos.mode(s), exp)
13121330

13131331
def test_timedelta_mode(self):
1314-
exp = Series([], dtype='timedelta64[ns]')
1332+
exp = Series(['-1 days', '0 days', '1 days'],
1333+
dtype='timedelta64[ns]')
13151334
s = Series(['1 days', '-1 days', '0 days'],
13161335
dtype='timedelta64[ns]')
13171336
tm.assert_series_equal(algos.mode(s), exp)
@@ -1331,13 +1350,13 @@ def test_uint64_overflow(self):
13311350
s = Series([1, 2**63, 2**63], dtype=np.uint64)
13321351
tm.assert_series_equal(algos.mode(s), exp)
13331352

1334-
exp = Series([], dtype=np.uint64)
1353+
exp = Series([1, 2**63], dtype=np.uint64)
13351354
s = Series([1, 2**63], dtype=np.uint64)
13361355
tm.assert_series_equal(algos.mode(s), exp)
13371356

13381357
def test_categorical(self):
13391358
c = Categorical([1, 2])
1340-
exp = Series([], dtype=np.int64)
1359+
exp = Series([1, 2], dtype=np.int64)
13411360
tm.assert_series_equal(algos.mode(c), exp)
13421361

13431362
c = Categorical([1, 'a', 'a'])
@@ -1350,7 +1369,7 @@ def test_categorical(self):
13501369

13511370
def test_index(self):
13521371
idx = Index([1, 2, 3])
1353-
exp = Series([], dtype=np.int64)
1372+
exp = Series([1, 2, 3], dtype=np.int64)
13541373
tm.assert_series_equal(algos.mode(idx), exp)
13551374

13561375
idx = Index([1, 'a', 'a'])

pandas/tests/test_categorical.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -1279,13 +1279,13 @@ def test_mode(self):
12791279
s = Categorical([1, 2, 3, 4, 5], categories=[5, 4, 3, 2, 1],
12801280
ordered=True)
12811281
res = s.mode()
1282-
exp = Categorical([], categories=[5, 4, 3, 2, 1], ordered=True)
1282+
exp = Categorical([5, 4, 3, 2, 1], categories=[5, 4, 3, 2, 1], ordered=True)
12831283
tm.assert_categorical_equal(res, exp)
12841284
# NaN should not become the mode!
12851285
s = Categorical([np.nan, np.nan, np.nan, 4, 5],
12861286
categories=[5, 4, 3, 2, 1], ordered=True)
12871287
res = s.mode()
1288-
exp = Categorical([], categories=[5, 4, 3, 2, 1], ordered=True)
1288+
exp = Categorical([5, 4], categories=[5, 4, 3, 2, 1], ordered=True)
12891289
tm.assert_categorical_equal(res, exp)
12901290
s = Categorical([np.nan, np.nan, np.nan, 4, 5, 4],
12911291
categories=[5, 4, 3, 2, 1], ordered=True)
@@ -2833,7 +2833,7 @@ def test_mode(self):
28332833
s = Series(Categorical([1, 2, 3, 4, 5], categories=[5, 4, 3, 2, 1],
28342834
ordered=True))
28352835
res = s.mode()
2836-
exp = Series(Categorical([], categories=[5, 4, 3, 2, 1], ordered=True))
2836+
exp = Series(Categorical([5, 4, 3, 2, 1], categories=[5, 4, 3, 2, 1], ordered=True))
28372837
tm.assert_series_equal(res, exp)
28382838

28392839
def test_value_counts(self):

0 commit comments

Comments
 (0)