diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 29d05ddcfb497..66de10c84ff36 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -924,3 +924,4 @@ Bug Fixes - Bug in ``pd.melt()`` where passing a tuple value for ``value_vars`` caused a ``TypeError`` (:issue:`15348`) - Bug in ``.eval()`` which caused multiline evals to fail with local variables not on the first line (:issue:`15342`) - Bug in ``pd.read_msgpack`` which did not allow to load dataframe with an index of type ``CategoricalIndex`` (:issue:`15487`) +- Bug in ``.mode()`` where ``mode`` was not returned for a single value (:issue:`15714`) diff --git a/pandas/_libs/hashtable_func_helper.pxi.in b/pandas/_libs/hashtable_func_helper.pxi.in index fa373905ef08a..cf6b3d9ce5c10 100644 --- a/pandas/_libs/hashtable_func_helper.pxi.in +++ b/pandas/_libs/hashtable_func_helper.pxi.in @@ -233,7 +233,7 @@ def mode_{{dtype}}(ndarray[{{ctype}}] values): def mode_{{dtype}}({{ctype}}[:] values): {{endif}} cdef: - int count, max_count = 2 + int count, max_count = 1 int j = -1 # so you can do += Py_ssize_t k kh_{{table_type}}_t *table diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index af51c7f2e2dc1..579f8fe791e06 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -1876,8 +1876,7 @@ def mode(self): """ Returns the mode(s) of the Categorical. - Empty if nothing occurs at least 2 times. Always returns `Categorical` - even if only one value. + Always returns `Categorical` even if only one value. Returns ------- diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 987eb10101f12..51a39f4b69693 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5168,9 +5168,8 @@ def _get_agg_axis(self, axis_num): def mode(self, axis=0, numeric_only=False): """ - Gets the mode(s) of each element along the axis selected. Empty if - nothing has 2+ occurrences. Adds a row for each mode per label, fills - in gaps with nan. + Gets the mode(s) of each element along the axis selected. Adds a row + for each mode per label, fills in gaps with nan. Note that there could be multiple values returned for the selected axis (when more than one item share the maximum frequency), which is diff --git a/pandas/core/series.py b/pandas/core/series.py index cfa25ca1299eb..2ae98fc77db53 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1192,8 +1192,7 @@ def count(self, level=None): def mode(self): """Return the mode(s) of the dataset. - Empty if nothing occurs at least 2 times. Always returns Series even - if only one value is returned. + Always returns Series even if only one value is returned. Returns ------- diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 6c917444f9f43..dec233a4421d2 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -809,18 +809,23 @@ def test_mode(self): "E": [8, 8, 1, 1, 3, 3]}) tm.assert_frame_equal(df[["A"]].mode(), pd.DataFrame({"A": [12]})) - expected = pd.Series([], dtype='int64', name='D').to_frame() + expected = pd.Series([0, 1, 2, 3, 4, 5], dtype='int64', name='D').\ + to_frame() tm.assert_frame_equal(df[["D"]].mode(), expected) expected = pd.Series([1, 3, 8], dtype='int64', name='E').to_frame() tm.assert_frame_equal(df[["E"]].mode(), expected) tm.assert_frame_equal(df[["A", "B"]].mode(), pd.DataFrame({"A": [12], "B": [10.]})) tm.assert_frame_equal(df.mode(), - pd.DataFrame({"A": [12, np.nan, np.nan], - "B": [10, np.nan, np.nan], - "C": [8, 9, np.nan], - "D": [np.nan, np.nan, np.nan], - "E": [1, 3, 8]})) + pd.DataFrame({"A": [12, np.nan, np.nan, np.nan, + np.nan, np.nan], + "B": [10, np.nan, np.nan, np.nan, + np.nan, np.nan], + "C": [8, 9, np.nan, np.nan, np.nan, + np.nan], + "D": [0, 1, 2, 3, 4, 5], + "E": [1, 3, 8, np.nan, np.nan, + np.nan]})) # outputs in sorted order df["C"] = list(reversed(df["C"])) @@ -837,20 +842,12 @@ def test_mode(self): df = pd.DataFrame({"A": np.arange(6, dtype='int64'), "B": pd.date_range('2011', periods=6), "C": list('abcdef')}) - exp = pd.DataFrame({"A": pd.Series([], dtype=df["A"].dtype), - "B": pd.Series([], dtype=df["B"].dtype), - "C": pd.Series([], dtype=df["C"].dtype)}) - tm.assert_frame_equal(df.mode(), exp) - - # and also when not empty - df.loc[1, "A"] = 0 - df.loc[4, "B"] = df.loc[3, "B"] - df.loc[5, "C"] = 'e' - exp = pd.DataFrame({"A": pd.Series([0], dtype=df["A"].dtype), - "B": pd.Series([df.loc[3, "B"]], + exp = pd.DataFrame({"A": pd.Series(np.arange(6, dtype='int64'), + dtype=df["A"].dtype), + "B": pd.Series(pd.date_range('2011', periods=6), dtype=df["B"].dtype), - "C": pd.Series(['e'], dtype=df["C"].dtype)}) - + "C": pd.Series(list('abcdef'), + dtype=df["C"].dtype)}) tm.assert_frame_equal(df.mode(), exp) def test_operators_timedelta64(self): diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index c2543581dca50..6c607ff580cbb 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -130,10 +130,10 @@ def test_mode(self): exp = Series([], dtype=np.float64) tm.assert_series_equal(Series([]).mode(), exp) - exp = Series([], dtype=np.int64) + exp = Series([1], dtype=np.int64) tm.assert_series_equal(Series([1]).mode(), exp) - exp = Series([], dtype=np.object) + exp = Series(['a', 'b', 'c'], dtype=np.object) tm.assert_series_equal(Series(['a', 'b', 'c']).mode(), exp) # Test numerical data types. @@ -169,7 +169,8 @@ def test_mode(self): tm.assert_series_equal(s.mode(), exp) # Test datetime types. - exp = Series([], dtype="M8[ns]") + exp = Series(['1900-05-03', '2011-01-03', + '2013-01-02'], dtype='M8[ns]') s = Series(['2011-01-03', '2013-01-02', '1900-05-03'], dtype='M8[ns]') tm.assert_series_equal(s.mode(), exp) @@ -180,7 +181,7 @@ def test_mode(self): tm.assert_series_equal(s.mode(), exp) # gh-5986: Test timedelta types. - exp = Series([], dtype='timedelta64[ns]') + exp = Series(['-1 days', '0 days', '1 days'], dtype='timedelta64[ns]') s = Series(['1 days', '-1 days', '0 days'], dtype='timedelta64[ns]') tm.assert_series_equal(s.mode(), exp) @@ -200,13 +201,13 @@ def test_mode(self): s = Series([1, 2**63, 2**63], dtype=np.uint64) tm.assert_series_equal(s.mode(), exp) - exp = Series([], dtype=np.uint64) + exp = Series([1, 2**63], dtype=np.uint64) s = Series([1, 2**63], dtype=np.uint64) tm.assert_series_equal(s.mode(), exp) # Test category dtype. c = Categorical([1, 2]) - exp = Categorical([], categories=[1, 2]) + exp = Categorical([1, 2], categories=[1, 2]) exp = Series(exp, dtype='category') tm.assert_series_equal(Series(c).mode(), exp) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 7a3cc3e2c3cd7..39527dcf5ac78 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -1256,10 +1256,27 @@ def test_no_mode(self): exp = Series([], dtype=np.float64) tm.assert_series_equal(algos.mode([]), exp) - exp = Series([], dtype=np.int) + # GH 15714 + def test_mode_single(self): + exp_single = [1] + data_single = [1] + + exp_multi = [1] + data_multi = [1, 1] + + for dt in np.typecodes['AllInteger'] + np.typecodes['Float']: + s = Series(data_single, dtype=dt) + exp = Series(exp_single, dtype=dt) + tm.assert_series_equal(algos.mode(s), exp) + + s = Series(data_multi, dtype=dt) + exp = Series(exp_multi, dtype=dt) + tm.assert_series_equal(algos.mode(s), exp) + + exp = Series([1], dtype=np.int) tm.assert_series_equal(algos.mode([1]), exp) - exp = Series([], dtype=np.object) + exp = Series(['a', 'b', 'c'], dtype=np.object) tm.assert_series_equal(algos.mode(['a', 'b', 'c']), exp) def test_number_mode(self): @@ -1295,7 +1312,8 @@ def test_strobj_mode(self): tm.assert_series_equal(algos.mode(s), exp) def test_datelike_mode(self): - exp = Series([], dtype="M8[ns]") + exp = Series(['1900-05-03', '2011-01-03', + '2013-01-02'], dtype="M8[ns]") s = Series(['2011-01-03', '2013-01-02', '1900-05-03'], dtype='M8[ns]') tm.assert_series_equal(algos.mode(s), exp) @@ -1306,7 +1324,8 @@ def test_datelike_mode(self): tm.assert_series_equal(algos.mode(s), exp) def test_timedelta_mode(self): - exp = Series([], dtype='timedelta64[ns]') + exp = Series(['-1 days', '0 days', '1 days'], + dtype='timedelta64[ns]') s = Series(['1 days', '-1 days', '0 days'], dtype='timedelta64[ns]') tm.assert_series_equal(algos.mode(s), exp) @@ -1326,13 +1345,13 @@ def test_uint64_overflow(self): s = Series([1, 2**63, 2**63], dtype=np.uint64) tm.assert_series_equal(algos.mode(s), exp) - exp = Series([], dtype=np.uint64) + exp = Series([1, 2**63], dtype=np.uint64) s = Series([1, 2**63], dtype=np.uint64) tm.assert_series_equal(algos.mode(s), exp) def test_categorical(self): c = Categorical([1, 2]) - exp = Series([], dtype=np.int64) + exp = Series([1, 2], dtype=np.int64) tm.assert_series_equal(algos.mode(c), exp) c = Categorical([1, 'a', 'a']) @@ -1345,7 +1364,7 @@ def test_categorical(self): def test_index(self): idx = Index([1, 2, 3]) - exp = Series([], dtype=np.int64) + exp = Series([1, 2, 3], dtype=np.int64) tm.assert_series_equal(algos.mode(idx), exp) idx = Index([1, 'a', 'a']) diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index 6c8aeba704c7b..40cba7ee78b3a 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -1372,13 +1372,13 @@ def test_mode(self): s = Categorical([1, 2, 3, 4, 5], categories=[5, 4, 3, 2, 1], ordered=True) res = s.mode() - exp = Categorical([], categories=[5, 4, 3, 2, 1], ordered=True) + exp = Categorical([5, 4, 3, 2, 1], categories=[5, 4, 3, 2, 1], ordered=True) tm.assert_categorical_equal(res, exp) # NaN should not become the mode! s = Categorical([np.nan, np.nan, np.nan, 4, 5], categories=[5, 4, 3, 2, 1], ordered=True) res = s.mode() - exp = Categorical([], categories=[5, 4, 3, 2, 1], ordered=True) + exp = Categorical([5, 4], categories=[5, 4, 3, 2, 1], ordered=True) tm.assert_categorical_equal(res, exp) s = Categorical([np.nan, np.nan, np.nan, 4, 5, 4], categories=[5, 4, 3, 2, 1], ordered=True) @@ -2980,7 +2980,7 @@ def test_mode(self): s = Series(Categorical([1, 2, 3, 4, 5], categories=[5, 4, 3, 2, 1], ordered=True)) res = s.mode() - exp = Series(Categorical([], categories=[5, 4, 3, 2, 1], ordered=True)) + exp = Series(Categorical([5, 4, 3, 2, 1], categories=[5, 4, 3, 2, 1], ordered=True)) tm.assert_series_equal(res, exp) def test_value_counts(self):