BUG: Return mode even if single value (#15714)

Robin · jreback · commit de589c23e0ae · 2017-03-29T19:48:58.000-04:00
Author: Robin <robin.bartholdson@formulate.se> This patch had conflicts when merged, resolved by Committer: Jeff Reback <jeff@reback.net> Closes #15744 from buyology/issue-15714-fix-mode and squashes the following commits: 8c08cd5 [Robin] Added multi-test and whatsnew note 5f36395 [Robin] Fixed flake issues, removed duplicate test, inserted GH issue number reference 5f829e1 [Robin] Merge conflict 0e2dec0 [Robin] Fixed tests 26db131 [Robin] Return mode even if single value (#15714) 44dbbb2 [Robin] Return mode even if single value (#15714)
diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt
@@ -1050,6 +1050,7 @@ Bug Fixes
 
 - Bug in ``.read_csv()`` with ``parse_dates`` when multiline headers are specified (:issue:`15376`)
 - Bug in ``groupby.transform()`` that would coerce the resultant dtypes back to the original (:issue:`10972`, :issue:`11444`)
+- Bug in ``.mode()`` where ``mode`` was not returned if was only a single value (:issue:`15714`)
 
 - Bug in ``DataFrame.hist`` where ``plt.tight_layout`` caused an ``AttributeError``  (use ``matplotlib >= 2.0.1``) (:issue:`9351`)
 - Bug in ``DataFrame.boxplot`` where ``fontsize`` was not applied to the tick labels on both axes (:issue:`15108`)
diff --git a/pandas/_libs/hashtable_func_helper.pxi.in b/pandas/_libs/hashtable_func_helper.pxi.in
@@ -309,7 +309,7 @@ def mode_{{dtype}}(ndarray[{{ctype}}] values):
 def mode_{{dtype}}({{ctype}}[:] values):
 {{endif}}
     cdef:
-        int count, max_count = 2
+        int count, max_count = 1
         int j = -1 # so you can do +=
         Py_ssize_t k
         kh_{{table_type}}_t *table
diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py
@@ -1868,8 +1868,7 @@ def mode(self):
         """
         Returns the mode(s) of the Categorical.
 
-        Empty if nothing occurs at least 2 times.  Always returns `Categorical`
-        even if only one value.
+        Always returns `Categorical` even if only one value.
 
         Returns
         -------
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -5127,9 +5127,8 @@ def _get_agg_axis(self, axis_num):
 
     def mode(self, axis=0, numeric_only=False):
         """
-        Gets the mode(s) of each element along the axis selected. Empty if
-        nothing has 2+ occurrences. Adds a row for each mode per label, fills
-        in gaps with nan.
+        Gets the mode(s) of each element along the axis selected. Adds a row
+        for each mode per label, fills in gaps with nan.
 
         Note that there could be multiple values returned for the selected
         axis (when more than one item share the maximum frequency), which is
diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -1192,8 +1192,7 @@ def count(self, level=None):
     def mode(self):
         """Return the mode(s) of the dataset.
 
-        Empty if nothing occurs at least 2 times. Always returns Series even
-        if only one value is returned.
+        Always returns Series even if only one value is returned.
 
         Returns
         -------
diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py
@@ -789,18 +789,23 @@ def test_mode(self):
                            "E": [8, 8, 1, 1, 3, 3]})
         tm.assert_frame_equal(df[["A"]].mode(),
                               pd.DataFrame({"A": [12]}))
-        expected = pd.Series([], dtype='int64', name='D').to_frame()
+        expected = pd.Series([0, 1, 2, 3, 4, 5], dtype='int64', name='D').\
+            to_frame()
         tm.assert_frame_equal(df[["D"]].mode(), expected)
         expected = pd.Series([1, 3, 8], dtype='int64', name='E').to_frame()
         tm.assert_frame_equal(df[["E"]].mode(), expected)
         tm.assert_frame_equal(df[["A", "B"]].mode(),
                               pd.DataFrame({"A": [12], "B": [10.]}))
         tm.assert_frame_equal(df.mode(),
-                              pd.DataFrame({"A": [12, np.nan, np.nan],
-                                            "B": [10, np.nan, np.nan],
-                                            "C": [8, 9, np.nan],
-                                            "D": [np.nan, np.nan, np.nan],
-                                            "E": [1, 3, 8]}))
+                              pd.DataFrame({"A": [12, np.nan, np.nan, np.nan,
+                                                  np.nan, np.nan],
+                                            "B": [10, np.nan, np.nan, np.nan,
+                                                  np.nan, np.nan],
+                                            "C": [8, 9, np.nan, np.nan, np.nan,
+                                                  np.nan],
+                                            "D": [0, 1, 2, 3, 4, 5],
+                                            "E": [1, 3, 8, np.nan, np.nan,
+                                                  np.nan]}))
 
         # outputs in sorted order
         df["C"] = list(reversed(df["C"]))
@@ -817,20 +822,12 @@ def test_mode(self):
         df = pd.DataFrame({"A": np.arange(6, dtype='int64'),
                            "B": pd.date_range('2011', periods=6),
                            "C": list('abcdef')})
-        exp = pd.DataFrame({"A": pd.Series([], dtype=df["A"].dtype),
-                            "B": pd.Series([], dtype=df["B"].dtype),
-                            "C": pd.Series([], dtype=df["C"].dtype)})
-        tm.assert_frame_equal(df.mode(), exp)
-
-        # and also when not empty
-        df.loc[1, "A"] = 0
-        df.loc[4, "B"] = df.loc[3, "B"]
-        df.loc[5, "C"] = 'e'
-        exp = pd.DataFrame({"A": pd.Series([0], dtype=df["A"].dtype),
-                            "B": pd.Series([df.loc[3, "B"]],
+        exp = pd.DataFrame({"A": pd.Series(np.arange(6, dtype='int64'),
+                                           dtype=df["A"].dtype),
+                            "B": pd.Series(pd.date_range('2011', periods=6),
                                            dtype=df["B"].dtype),
-                            "C": pd.Series(['e'], dtype=df["C"].dtype)})
-
+                            "C": pd.Series(list('abcdef'),
+                                           dtype=df["C"].dtype)})
         tm.assert_frame_equal(df.mode(), exp)
 
     def test_operators_timedelta64(self):
diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py
@@ -130,10 +130,10 @@ def test_mode(self):
         exp = Series([], dtype=np.float64)
         tm.assert_series_equal(Series([]).mode(), exp)
 
-        exp = Series([], dtype=np.int64)
+        exp = Series([1], dtype=np.int64)
         tm.assert_series_equal(Series([1]).mode(), exp)
 
-        exp = Series([], dtype=np.object)
+        exp = Series(['a', 'b', 'c'], dtype=np.object)
         tm.assert_series_equal(Series(['a', 'b', 'c']).mode(), exp)
 
         # Test numerical data types.
@@ -169,7 +169,8 @@ def test_mode(self):
             tm.assert_series_equal(s.mode(), exp)
 
         # Test datetime types.
-        exp = Series([], dtype="M8[ns]")
+        exp = Series(['1900-05-03', '2011-01-03',
+                      '2013-01-02'], dtype='M8[ns]')
         s = Series(['2011-01-03', '2013-01-02',
                     '1900-05-03'], dtype='M8[ns]')
         tm.assert_series_equal(s.mode(), exp)
@@ -180,7 +181,7 @@ def test_mode(self):
         tm.assert_series_equal(s.mode(), exp)
 
         # gh-5986: Test timedelta types.
-        exp = Series([], dtype='timedelta64[ns]')
+        exp = Series(['-1 days', '0 days', '1 days'], dtype='timedelta64[ns]')
         s = Series(['1 days', '-1 days', '0 days'],
                    dtype='timedelta64[ns]')
         tm.assert_series_equal(s.mode(), exp)
@@ -200,13 +201,13 @@ def test_mode(self):
         s = Series([1, 2**63, 2**63], dtype=np.uint64)
         tm.assert_series_equal(s.mode(), exp)
 
-        exp = Series([], dtype=np.uint64)
+        exp = Series([1, 2**63], dtype=np.uint64)
         s = Series([1, 2**63], dtype=np.uint64)
         tm.assert_series_equal(s.mode(), exp)
 
         # Test category dtype.
         c = Categorical([1, 2])
-        exp = Categorical([], categories=[1, 2])
+        exp = Categorical([1, 2], categories=[1, 2])
         exp = Series(exp, dtype='category')
         tm.assert_series_equal(Series(c).mode(), exp)
 
diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py
@@ -1261,10 +1261,27 @@ def test_no_mode(self):
         exp = Series([], dtype=np.float64)
         tm.assert_series_equal(algos.mode([]), exp)
 
-        exp = Series([], dtype=np.int)
+    # GH 15714
+    def test_mode_single(self):
+        exp_single = [1]
+        data_single = [1]
+
+        exp_multi = [1]
+        data_multi = [1, 1]
+
+        for dt in np.typecodes['AllInteger'] + np.typecodes['Float']:
+            s = Series(data_single, dtype=dt)
+            exp = Series(exp_single, dtype=dt)
+            tm.assert_series_equal(algos.mode(s), exp)
+
+            s = Series(data_multi, dtype=dt)
+            exp = Series(exp_multi, dtype=dt)
+            tm.assert_series_equal(algos.mode(s), exp)
+
+        exp = Series([1], dtype=np.int)
         tm.assert_series_equal(algos.mode([1]), exp)
 
-        exp = Series([], dtype=np.object)
+        exp = Series(['a', 'b', 'c'], dtype=np.object)
         tm.assert_series_equal(algos.mode(['a', 'b', 'c']), exp)
 
     def test_number_mode(self):
@@ -1300,7 +1317,8 @@ def test_strobj_mode(self):
             tm.assert_series_equal(algos.mode(s), exp)
 
     def test_datelike_mode(self):
-        exp = Series([], dtype="M8[ns]")
+        exp = Series(['1900-05-03', '2011-01-03',
+                      '2013-01-02'], dtype="M8[ns]")
         s = Series(['2011-01-03', '2013-01-02',
                     '1900-05-03'], dtype='M8[ns]')
         tm.assert_series_equal(algos.mode(s), exp)
@@ -1311,7 +1329,8 @@ def test_datelike_mode(self):
         tm.assert_series_equal(algos.mode(s), exp)
 
     def test_timedelta_mode(self):
-        exp = Series([], dtype='timedelta64[ns]')
+        exp = Series(['-1 days', '0 days', '1 days'],
+                     dtype='timedelta64[ns]')
         s = Series(['1 days', '-1 days', '0 days'],
                    dtype='timedelta64[ns]')
         tm.assert_series_equal(algos.mode(s), exp)
@@ -1331,13 +1350,13 @@ def test_uint64_overflow(self):
         s = Series([1, 2**63, 2**63], dtype=np.uint64)
         tm.assert_series_equal(algos.mode(s), exp)
 
-        exp = Series([], dtype=np.uint64)
+        exp = Series([1, 2**63], dtype=np.uint64)
         s = Series([1, 2**63], dtype=np.uint64)
         tm.assert_series_equal(algos.mode(s), exp)
 
     def test_categorical(self):
         c = Categorical([1, 2])
-        exp = Series([], dtype=np.int64)
+        exp = Series([1, 2], dtype=np.int64)
         tm.assert_series_equal(algos.mode(c), exp)
 
         c = Categorical([1, 'a', 'a'])
@@ -1350,7 +1369,7 @@ def test_categorical(self):
 
     def test_index(self):
         idx = Index([1, 2, 3])
-        exp = Series([], dtype=np.int64)
+        exp = Series([1, 2, 3], dtype=np.int64)
         tm.assert_series_equal(algos.mode(idx), exp)
 
         idx = Index([1, 'a', 'a'])
diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py
@@ -1279,13 +1279,13 @@ def test_mode(self):
         s = Categorical([1, 2, 3, 4, 5], categories=[5, 4, 3, 2, 1],
                         ordered=True)
         res = s.mode()
-        exp = Categorical([], categories=[5, 4, 3, 2, 1], ordered=True)
+        exp = Categorical([5, 4, 3, 2, 1], categories=[5, 4, 3, 2, 1], ordered=True)
         tm.assert_categorical_equal(res, exp)
         # NaN should not become the mode!
         s = Categorical([np.nan, np.nan, np.nan, 4, 5],
                         categories=[5, 4, 3, 2, 1], ordered=True)
         res = s.mode()
-        exp = Categorical([], categories=[5, 4, 3, 2, 1], ordered=True)
+        exp = Categorical([5, 4], categories=[5, 4, 3, 2, 1], ordered=True)
         tm.assert_categorical_equal(res, exp)
         s = Categorical([np.nan, np.nan, np.nan, 4, 5, 4],
                         categories=[5, 4, 3, 2, 1], ordered=True)
@@ -2833,7 +2833,7 @@ def test_mode(self):
         s = Series(Categorical([1, 2, 3, 4, 5], categories=[5, 4, 3, 2, 1],
                                ordered=True))
         res = s.mode()
-        exp = Series(Categorical([], categories=[5, 4, 3, 2, 1], ordered=True))
+        exp = Series(Categorical([5, 4, 3, 2, 1], categories=[5, 4, 3, 2, 1], ordered=True))
         tm.assert_series_equal(res, exp)
 
     def test_value_counts(self):