diff --git a/asv_bench/benchmarks/series_methods.py b/asv_bench/benchmarks/series_methods.py
index e3ab8b243e25a..3214b21133b72 100644
--- a/asv_bench/benchmarks/series_methods.py
+++ b/asv_bench/benchmarks/series_methods.py
@@ -284,16 +284,29 @@ def time_clip(self, n):
 
 class ValueCounts:
 
-    params = ["int", "uint", "float", "object"]
-    param_names = ["dtype"]
+    params = [[10 ** 3, 10 ** 4, 10 ** 5], ["int", "uint", "float", "object"]]
+    param_names = ["N", "dtype"]
 
-    def setup(self, dtype):
-        self.s = Series(np.random.randint(0, 1000, size=100000)).astype(dtype)
+    def setup(self, N, dtype):
+        self.s = Series(np.random.randint(0, N, size=10 * N)).astype(dtype)
 
-    def time_value_counts(self, dtype):
+    def time_value_counts(self, N, dtype):
         self.s.value_counts()
 
 
+class Mode:
+
+    params = [[10 ** 3, 10 ** 4, 10 ** 5], ["int", "uint", "float", "object"]]
+    param_names = ["N", "dtype"]
+
+    def setup(self, N, dtype):
+        np.random.seed(42)
+        self.s = Series(np.random.randint(0, N, size=10 * N)).astype(dtype)
+
+    def time_mode(self, N, dtype):
+        self.s.mode()
+
+
 class Dir:
     def setup(self):
         self.s = Series(index=tm.makeStringIndex(10000))
diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
index 381a05a18b278..284371efb05b2 100644
--- a/doc/source/whatsnew/v1.3.0.rst
+++ b/doc/source/whatsnew/v1.3.0.rst
@@ -359,7 +359,7 @@ Reshaping
 - Bug in :func:`join` over :class:`MultiIndex` returned wrong result, when one of both indexes had only one level (:issue:`36909`)
 - :meth:`merge_asof` raises ``ValueError`` instead of cryptic ``TypeError`` in case of non-numerical merge columns (:issue:`29130`)
 - Bug in :meth:`DataFrame.join` not assigning values correctly when having :class:`MultiIndex` where at least one dimension is from dtype ``Categorical`` with non-alphabetically sorted categories (:issue:`38502`)
-- :meth:`Series.value_counts` returns keys in original order (:issue:`12679`, :issue:`11227`)
+- :meth:`Series.value_counts` and :meth:`Series.mode` return consistent keys in original order (:issue:`12679`, :issue:`11227` and :issue:`39007`)
 - Bug in :meth:`DataFrame.apply` would give incorrect results when used with a string argument and ``axis=1`` when the axis argument was not supported and now raises a ``ValueError`` instead (:issue:`39211`)
 -
 
diff --git a/pandas/_libs/hashtable_func_helper.pxi.in b/pandas/_libs/hashtable_func_helper.pxi.in
index b4da5a3c7fb09..4684ecb8716c0 100644
--- a/pandas/_libs/hashtable_func_helper.pxi.in
+++ b/pandas/_libs/hashtable_func_helper.pxi.in
@@ -28,52 +28,6 @@ dtypes = [('Complex128', 'complex128', 'complex128',
 {{for name, dtype, ttype, c_type, to_c_type in dtypes}}
 
 
-@cython.wraparound(False)
-@cython.boundscheck(False)
-{{if dtype == 'object'}}
-cdef build_count_table_{{dtype}}(ndarray[{{dtype}}] values,
-                                 kh_{{ttype}}_t *table, bint dropna):
-{{else}}
-cdef build_count_table_{{dtype}}(const {{dtype}}_t[:] values,
-                                 kh_{{ttype}}_t *table, bint dropna):
-{{endif}}
-    cdef:
-        khiter_t k
-        Py_ssize_t i, n = len(values)
-
-        {{c_type}} val
-
-        int ret = 0
-
-    {{if dtype == 'object'}}
-    kh_resize_{{ttype}}(table, n // 10)
-
-    for i in range(n):
-        val = values[i]
-        if not checknull(val) or not dropna:
-            k = kh_get_{{ttype}}(table, <PyObject*>val)
-            if k != table.n_buckets:
-                table.vals[k] += 1
-            else:
-                k = kh_put_{{ttype}}(table, <PyObject*>val, &ret)
-                table.vals[k] = 1
-    {{else}}
-    with nogil:
-        kh_resize_{{ttype}}(table, n)
-
-        for i in range(n):
-            val = {{to_c_type}}(values[i])
-
-            if not is_nan_{{c_type}}(val) or not dropna:
-                k = kh_get_{{ttype}}(table, val)
-                if k != table.n_buckets:
-                    table.vals[k] += 1
-                else:
-                    k = kh_put_{{ttype}}(table, val, &ret)
-                    table.vals[k] = 1
-    {{endif}}
-
-
 @cython.wraparound(False)
 @cython.boundscheck(False)
 {{if dtype == 'object'}}
@@ -84,8 +38,6 @@ cpdef value_count_{{dtype}}(const {{dtype}}_t[:] values, bint dropna):
     cdef:
         Py_ssize_t i = 0
         Py_ssize_t n = len(values)
-        size_t unique_key_index = 0
-        size_t unique_key_count = 0
         kh_{{ttype}}_t *table
 
         # Don't use Py_ssize_t, since table.n_buckets is unsigned
@@ -98,12 +50,10 @@ cpdef value_count_{{dtype}}(const {{dtype}}_t[:] values, bint dropna):
 
     # we track the order in which keys are first seen (GH39009),
     # khash-map isn't insertion-ordered, thus:
-    #    table maps key to index_of_appearence
-    #    result_keys maps index_of_appearence to key
-    #    result_counts maps index_of_appearence to number of elements
+    #    table maps keys to counts
+    #    result_keys remembers the original order of keys
 
     result_keys = {{name}}Vector()
-    result_counts = Int64Vector()
     table = kh_init_{{ttype}}()
 
     {{if dtype == 'object'}}
@@ -118,14 +68,11 @@ cpdef value_count_{{dtype}}(const {{dtype}}_t[:] values, bint dropna):
                 val = navalue
             k = kh_get_{{ttype}}(table, <PyObject*>val)
             if k != table.n_buckets:
-                unique_key_index = table.vals[k]
-                result_counts.data.data[unique_key_index] += 1
+                table.vals[k] += 1
             else:
                 k = kh_put_{{ttype}}(table, <PyObject*>val, &ret)
-                table.vals[k] = unique_key_count
+                table.vals[k] = 1
                 result_keys.append(val)
-                result_counts.append(1)
-                unique_key_count+=1
     {{else}}
     kh_resize_{{ttype}}(table, n)
 
@@ -135,19 +82,26 @@ cpdef value_count_{{dtype}}(const {{dtype}}_t[:] values, bint dropna):
         if not is_nan_{{c_type}}(val) or not dropna:
             k = kh_get_{{ttype}}(table, val)
             if k != table.n_buckets:
-                unique_key_index = table.vals[k]
-                result_counts.data.data[unique_key_index] += 1
+                table.vals[k] += 1
             else:
                 k = kh_put_{{ttype}}(table, val, &ret)
-                table.vals[k] = unique_key_count
+                table.vals[k] = 1
                 result_keys.append(val)
-                result_counts.append(1)
-                unique_key_count+=1
     {{endif}}
 
+    # collect counts in the order corresponding to result_keys:
+    cdef int64_t[:] result_counts = np.empty(table.size, dtype=np.int64)
+    for i in range(table.size):
+        {{if dtype == 'object'}}
+        k = kh_get_{{ttype}}(table, result_keys.data[i])
+        {{else}}
+        k = kh_get_{{ttype}}(table, result_keys.data.data[i])
+        {{endif}}
+        result_counts[i] = table.vals[k]
+
     kh_destroy_{{ttype}}(table)
 
-    return result_keys.to_array(), result_counts.to_array()
+    return result_keys.to_array(), result_counts.base
 
 
 @cython.wraparound(False)
@@ -294,78 +248,42 @@ def ismember_{{dtype}}(const {{dtype}}_t[:] arr, const {{dtype}}_t[:] values):
     kh_destroy_{{ttype}}(table)
     return result.view(np.bool_)
 
-{{endfor}}
-
-
 # ----------------------------------------------------------------------
 # Mode Computations
 # ----------------------------------------------------------------------
 
-{{py:
-
-# dtype, ctype, table_type, npy_dtype
-dtypes = [('complex128', 'khcomplex128_t', 'complex128', 'complex128'),
-          ('complex64', 'khcomplex64_t', 'complex64', 'complex64'),
-          ('float64', 'float64_t', 'float64', 'float64'),
-          ('float32', 'float32_t', 'float32', 'float32'),
-          ('int64', 'int64_t', 'int64', 'int64'),
-          ('int32', 'int32_t', 'int32', 'int32'),
-          ('int16', 'int16_t', 'int16', 'int16'),
-          ('int8', 'int8_t', 'int8', 'int8'),
-          ('uint64', 'uint64_t', 'uint64', 'uint64'),
-          ('uint32', 'uint32_t', 'uint32', 'uint32'),
-          ('uint16', 'uint16_t', 'uint16', 'uint16'),
-          ('uint8', 'uint8_t', 'uint8', 'uint8'),
-          ('object', 'object', 'pymap', 'object_')]
-}}
-
-{{for dtype, ctype, table_type, npy_dtype in dtypes}}
-
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-
 {{if dtype == 'object'}}
-
-
-def mode_{{dtype}}(ndarray[{{ctype}}] values, bint dropna):
+def mode_{{dtype}}(ndarray[{{dtype}}] values, bint dropna):
 {{else}}
-
-
 def mode_{{dtype}}(const {{dtype}}_t[:] values, bint dropna):
 {{endif}}
     cdef:
-        int count, max_count = 1
-        int j = -1  # so you can do +=
-        # Don't use Py_ssize_t, since table.n_buckets is unsigned
-        khiter_t k
-        kh_{{table_type}}_t *table
-        ndarray[{{ctype}}] modes
+        {{if dtype == 'object'}}
+        ndarray[{{dtype}}] keys
+        ndarray[{{dtype}}] modes
+        {{else}}
+        {{dtype}}_t[:] keys
+        ndarray[{{dtype}}_t] modes
+        {{endif}}
+        int64_t[:] counts
+        int64_t count, max_count = -1
+        Py_ssize_t k, j = 0
 
-    table = kh_init_{{table_type}}()
-    build_count_table_{{dtype}}(values, table, dropna)
+    keys, counts = value_count_{{dtype}}(values, dropna)
 
-    modes = np.empty(table.n_buckets, dtype=np.{{npy_dtype}})
+    {{if dtype == 'object'}}
+    modes = np.empty(len(keys), dtype=np.object_)
+    {{else}}
+    modes = np.empty(len(keys), dtype=np.{{dtype}})
+    {{endif}}
 
     {{if dtype != 'object'}}
     with nogil:
-        for k in range(table.n_buckets):
-            if kh_exist_{{table_type}}(table, k):
-                count = table.vals[k]
-                if count == max_count:
-                    j += 1
-                elif count > max_count:
-                    max_count = count
-                    j = 0
-                else:
-                    continue
-
-                modes[j] = table.keys[k]
-    {{else}}
-    for k in range(table.n_buckets):
-        if kh_exist_{{table_type}}(table, k):
-            count = table.vals[k]
-
+        for k in range(len(keys)):
+            count = counts[k]
             if count == max_count:
                 j += 1
             elif count > max_count:
@@ -374,11 +292,21 @@ def mode_{{dtype}}(const {{dtype}}_t[:] values, bint dropna):
             else:
                 continue
 
-            modes[j] = <object>table.keys[k]
+            modes[j] = keys[k]
+    {{else}}
+    for k in range(len(keys)):
+        count = counts[k]
+        if count == max_count:
+            j += 1
+        elif count > max_count:
+            max_count = count
+            j = 0
+        else:
+            continue
+
+        modes[j] = keys[k]
     {{endif}}
 
-    kh_destroy_{{table_type}}(table)
-
     return modes[:j + 1]
 
 {{endfor}}
diff --git a/pandas/tests/libs/test_hashtable.py b/pandas/tests/libs/test_hashtable.py
index 5bf652c206a5f..8393312004241 100644
--- a/pandas/tests/libs/test_hashtable.py
+++ b/pandas/tests/libs/test_hashtable.py
@@ -6,6 +6,7 @@
 
 from pandas._libs import hashtable as ht
 
+import pandas as pd
 import pandas._testing as tm
 
 
@@ -323,6 +324,23 @@ def test_mode(self, dtype, type_suffix, writable):
         result = mode(values, False)
         assert result == 42
 
+    def test_mode_stable(self, dtype, type_suffix, writable):
+        mode = get_ht_function("mode", type_suffix)
+        values = np.array([2, 1, 5, 22, 3, -1, 8]).astype(dtype)
+        values.flags.writeable = writable
+        keys = mode(values, False)
+        tm.assert_numpy_array_equal(keys, values)
+
+
+def test_modes_with_nans():
+    # GH39007
+    values = np.array([True, pd.NA, np.nan], dtype=np.object_)
+    # pd.Na and np.nan will have the same representative: np.nan
+    # thus we have 2 nans and 1 True
+    modes = ht.mode_object(values, False)
+    assert modes.size == 1
+    assert np.isnan(modes[0])
+
 
 @pytest.mark.parametrize(
     "dtype, type_suffix",