pandas-dev · jreback · Jan 27, 2021 · Jan 9, 2021 · Jan 23, 2021 · Jan 25, 2021
diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
@@ -359,7 +359,7 @@ Reshaping
 - Bug in :func:`join` over :class:`MultiIndex` returned wrong result, when one of both indexes had only one level (:issue:`36909`)
 - :meth:`merge_asof` raises ``ValueError`` instead of cryptic ``TypeError`` in case of non-numerical merge columns (:issue:`29130`)
 - Bug in :meth:`DataFrame.join` not assigning values correctly when having :class:`MultiIndex` where at least one dimension is from dtype ``Categorical`` with non-alphabetically sorted categories (:issue:`38502`)
-- :meth:`Series.value_counts` returns keys in original order (:issue:`12679`, :issue:`11227`)
+- :meth:`Series.value_counts` and :meth:`Series.mode` return consistent keys in original order (:issue:`12679`, :issue:`11227` and :issue:`39007`)
 - Bug in :meth:`DataFrame.apply` would give incorrect results when used with a string argument and ``axis=1`` when the axis argument was not supported and now raises a ``ValueError`` instead (:issue:`39211`)
 -
 

diff --git a/pandas/_libs/hashtable_func_helper.pxi.in b/pandas/_libs/hashtable_func_helper.pxi.in
@@ -28,52 +28,6 @@ dtypes = [('Complex128', 'complex128', 'complex128',
 {{for name, dtype, ttype, c_type, to_c_type in dtypes}}
 
 
-@cython.wraparound(False)
-@cython.boundscheck(False)
-{{if dtype == 'object'}}
-cdef build_count_table_{{dtype}}(ndarray[{{dtype}}] values,
-                                 kh_{{ttype}}_t *table, bint dropna):
-{{else}}
-cdef build_count_table_{{dtype}}(const {{dtype}}_t[:] values,
-                                 kh_{{ttype}}_t *table, bint dropna):
-{{endif}}
-    cdef:
-        khiter_t k
-        Py_ssize_t i, n = len(values)
-
-        {{c_type}} val
-
-        int ret = 0
-
-    {{if dtype == 'object'}}
-    kh_resize_{{ttype}}(table, n // 10)
-
-    for i in range(n):
-        val = values[i]
-        if not checknull(val) or not dropna:
-            k = kh_get_{{ttype}}(table, <PyObject*>val)
-            if k != table.n_buckets:
-                table.vals[k] += 1
-            else:
-                k = kh_put_{{ttype}}(table, <PyObject*>val, &ret)
-                table.vals[k] = 1
-    {{else}}
-    with nogil:
-        kh_resize_{{ttype}}(table, n)
-
-        for i in range(n):
-            val = {{to_c_type}}(values[i])
-
-            if not is_nan_{{c_type}}(val) or not dropna:
-                k = kh_get_{{ttype}}(table, val)
-                if k != table.n_buckets:
-                    table.vals[k] += 1
-                else:
-                    k = kh_put_{{ttype}}(table, val, &ret)
-                    table.vals[k] = 1
-    {{endif}}
-
-
 @cython.wraparound(False)
 @cython.boundscheck(False)
 {{if dtype == 'object'}}
@@ -294,78 +248,42 @@ def ismember_{{dtype}}(const {{dtype}}_t[:] arr, const {{dtype}}_t[:] values):
     kh_destroy_{{ttype}}(table)
     return result.view(np.bool_)
 
-{{endfor}}
-
-
 # ----------------------------------------------------------------------
 # Mode Computations
 # ----------------------------------------------------------------------
 
-{{py:
-
-# dtype, ctype, table_type, npy_dtype
-dtypes = [('complex128', 'khcomplex128_t', 'complex128', 'complex128'),
-          ('complex64', 'khcomplex64_t', 'complex64', 'complex64'),
-          ('float64', 'float64_t', 'float64', 'float64'),
-          ('float32', 'float32_t', 'float32', 'float32'),
-          ('int64', 'int64_t', 'int64', 'int64'),
-          ('int32', 'int32_t', 'int32', 'int32'),
-          ('int16', 'int16_t', 'int16', 'int16'),
-          ('int8', 'int8_t', 'int8', 'int8'),
-          ('uint64', 'uint64_t', 'uint64', 'uint64'),
-          ('uint32', 'uint32_t', 'uint32', 'uint32'),
-          ('uint16', 'uint16_t', 'uint16', 'uint16'),
-          ('uint8', 'uint8_t', 'uint8', 'uint8'),
-          ('object', 'object', 'pymap', 'object_')]
-}}
-
-{{for dtype, ctype, table_type, npy_dtype in dtypes}}
-
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-
 {{if dtype == 'object'}}
-
-
-def mode_{{dtype}}(ndarray[{{ctype}}] values, bint dropna):
+def mode_{{dtype}}(ndarray[{{dtype}}] values, bint dropna):
 {{else}}
-
-
 def mode_{{dtype}}(const {{dtype}}_t[:] values, bint dropna):
 {{endif}}
     cdef:
-        int count, max_count = 1
-        int j = -1  # so you can do +=
-        # Don't use Py_ssize_t, since table.n_buckets is unsigned
-        khiter_t k
-        kh_{{table_type}}_t *table
-        ndarray[{{ctype}}] modes
+        {{if dtype == 'object'}}
+        ndarray[{{dtype}}] keys
+        ndarray[{{dtype}}] modes
+        {{else}}
+        {{dtype}}_t[:] keys
+        ndarray[{{dtype}}_t] modes
+        {{endif}}
+        int64_t[:] counts
+        int64_t count, max_count = -1
+        Py_ssize_t k, j = 0
 
-    table = kh_init_{{table_type}}()
-    build_count_table_{{dtype}}(values, table, dropna)
+    keys, counts = value_count_{{dtype}}(values, dropna)
 
-    modes = np.empty(table.n_buckets, dtype=np.{{npy_dtype}})
+    {{if dtype == 'object'}}
+    modes = np.empty(len(keys), dtype=np.object_)
+    {{else}}
+    modes = np.empty(len(keys), dtype=np.{{dtype}})
+    {{endif}}
 
     {{if dtype != 'object'}}
     with nogil:
-        for k in range(table.n_buckets):
-            if kh_exist_{{table_type}}(table, k):
-                count = table.vals[k]
-                if count == max_count:
-                    j += 1
-                elif count > max_count:
-                    max_count = count
-                    j = 0
-                else:
-                    continue
-
-                modes[j] = table.keys[k]
-    {{else}}
-    for k in range(table.n_buckets):
-        if kh_exist_{{table_type}}(table, k):
-            count = table.vals[k]
-
+        for k in range(len(keys)):
+            count = counts[k]
             if count == max_count:
                 j += 1
             elif count > max_count:
@@ -374,11 +292,21 @@ def mode_{{dtype}}(const {{dtype}}_t[:] values, bint dropna):
             else:
                 continue
 
-            modes[j] = <object>table.keys[k]
+            modes[j] = keys[k]
+    {{else}}
+    for k in range(len(keys)):
+        count = counts[k]
+        if count == max_count:
+            j += 1
+        elif count > max_count:
+            max_count = count
+            j = 0
+        else:
+            continue
+
+        modes[j] = keys[k]
     {{endif}}
 
-    kh_destroy_{{table_type}}(table)
-
     return modes[:j + 1]
 
 {{endfor}}
diff --git a/pandas/tests/libs/test_hashtable.py b/pandas/tests/libs/test_hashtable.py
@@ -5,7 +5,9 @@
 import pytest
 
 from pandas._libs import hashtable as ht
+from pandas._libs.missing import checknull
 
+import pandas as pd
 import pandas._testing as tm
 
 
@@ -323,6 +325,23 @@ def test_mode(self, dtype, type_suffix, writable):
         result = mode(values, False)
         assert result == 42
 
+    def test_mode_stable(self, dtype, type_suffix, writable):
+        mode = get_ht_function("mode", type_suffix)
+        values = np.array([2, 1, 5, 22, 3, -1, 8]).astype(dtype)
+        values.flags.writeable = writable
+        keys = mode(values, False)
+        tm.assert_numpy_array_equal(keys, values)
+
+
+def test_modes_with_nans():
+    # GH39007
+    values = np.array([True, pd.NA, np.nan], dtype=np.object_)
+    # pd.Na and np.nan will have the same representative: np.nan
+    # thus we have 2 nans and 1 True
+    modes = ht.mode_object(values, False)
+    assert modes.size == 1
+    assert checknull(modes[0])
+
 
 @pytest.mark.parametrize(
     "dtype, type_suffix",