From 18050cb897e75ed430ddb8440d061244f81f08ad Mon Sep 17 00:00:00 2001
From: Egor Dranischnikow <egor.dranischnikow@googlemail.com>
Date: Tue, 5 Jan 2021 01:04:19 +0100
Subject: [PATCH 01/14] introducing stable value_counts function

---
 pandas/_libs/hashtable_func_helper.pxi.in | 85 +++++++++++++++--------
 1 file changed, 55 insertions(+), 30 deletions(-)

diff --git a/pandas/_libs/hashtable_func_helper.pxi.in b/pandas/_libs/hashtable_func_helper.pxi.in
index f8f541235dcb7..0335acb86591b 100644
--- a/pandas/_libs/hashtable_func_helper.pxi.in
+++ b/pandas/_libs/hashtable_func_helper.pxi.in
@@ -77,54 +77,79 @@ cdef build_count_table_{{dtype}}(const {{dtype}}_t[:] values,
 @cython.wraparound(False)
 @cython.boundscheck(False)
 {{if dtype == 'object'}}
-cpdef value_count_{{dtype}}(ndarray[{{dtype}}] values, bint dropna):
+cpdef stable_value_count_{{dtype}}(ndarray[{{dtype}}] values, bint dropna):
 {{else}}
-cpdef value_count_{{dtype}}(const {{dtype}}_t[:] values, bint dropna):
+cpdef stable_value_count_{{dtype}}(const {{dtype}}_t[:] values, bint dropna):
 {{endif}}
     cdef:
         Py_ssize_t i = 0
+        Py_ssize_t n = len(values)
+        size_t unique_key_index = 0
+        size_t unique_key_count = 0
         kh_{{ttype}}_t *table
 
-        {{if dtype != 'object'}}
-        {{dtype}}_t[:] result_keys
-        int64_t[:] result_counts
-        {{endif}}
-
         # Don't use Py_ssize_t, since table.n_buckets is unsigned
         khiter_t k
 
-    table = kh_init_{{ttype}}()
-    {{if dtype == 'object'}}
-    build_count_table_{{dtype}}(values, table, 1)
+        {{c_type}} val
+
+        int ret = 0
+
+    {{if dtype[0]!='u'}}
+    result_keys = {{dtype.title()}}Vector()
     {{else}}
-    build_count_table_{{dtype}}(values, table, dropna)
+    result_keys = {{'U'+dtype[1::].title()}}Vector()
     {{endif}}
-
-    result_keys = np.empty(table.n_occupied, '{{dtype}}')
-    result_counts = np.zeros(table.n_occupied, dtype=np.int64)
+    result_counts = Int64Vector()
+    table = kh_init_{{ttype}}()
 
     {{if dtype == 'object'}}
-    for k in range(table.n_buckets):
-        if kh_exist_{{ttype}}(table, k):
-            result_keys[i] = <{{dtype}}>table.keys[k]
-            result_counts[i] = table.vals[k]
-            i += 1
+    kh_resize_{{ttype}}(table, n // 10)
+
+    for i in range(n):
+        val = values[i]
+        if not checknull(val) or not dropna:
+            k = kh_get_{{ttype}}(table, <PyObject*>val)
+            if k != table.n_buckets:
+                unique_key_index = table.vals[k]
+                result_counts.data.data[unique_key_index] += 1
+            else:
+                k = kh_put_{{ttype}}(table, <PyObject*>val, &ret)
+                table.vals[k] = unique_key_count
+                result_keys.append(val)
+                result_counts.append(1)
+                unique_key_count+=1
     {{else}}
-    with nogil:
-        for k in range(table.n_buckets):
-            if kh_exist_{{ttype}}(table, k):
-                result_keys[i] = {{to_dtype}}(table.keys[k])
-                result_counts[i] = table.vals[k]
-                i += 1
+    kh_resize_{{ttype}}(table, n)
+
+    for i in range(n):
+        val = {{to_c_type}}(values[i])
+
+        if not is_nan_{{c_type}}(val) or not dropna:
+            k = kh_get_{{ttype}}(table, val)
+            if k != table.n_buckets:
+                unique_key_index = table.vals[k]
+                result_counts.data.data[unique_key_index] += 1
+            else:
+                k = kh_put_{{ttype}}(table, val, &ret)
+                table.vals[k] = unique_key_count
+                result_keys.append(val)
+                result_counts.append(1)
+                unique_key_count+=1
     {{endif}}
 
     kh_destroy_{{ttype}}(table)
 
-    {{if dtype == 'object'}}
-    return result_keys, result_counts
-    {{else}}
-    return np.asarray(result_keys), np.asarray(result_counts)
-    {{endif}}
+    return result_keys.to_array(), result_counts.to_array()
+
+
+{{if dtype == 'object'}}
+cpdef value_count_{{dtype}}(ndarray[{{dtype}}] values, bint dropna):
+    return stable_value_count_{{dtype}}(values, 1)
+{{else}}
+cpdef value_count_{{dtype}}(const {{dtype}}_t[:] values, bint dropna):
+    return stable_value_count_{{dtype}}(values, dropna)
+{{endif}}
 
 
 @cython.wraparound(False)

From bfa7e570e4db2ec85dd6ac8070f661329136186e Mon Sep 17 00:00:00 2001
From: Egor Dranischnikow <egor.dranischnikow@googlemail.com>
Date: Tue, 5 Jan 2021 23:17:05 +0100
Subject: [PATCH 02/14] cleaning up: remove no longer needed functions

---
 pandas/_libs/hashtable_class_helper.pxi.in |  7 -----
 pandas/_libs/hashtable_func_helper.pxi.in  | 30 +++++++++++-----------
 2 files changed, 15 insertions(+), 22 deletions(-)

diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in
index 276f162545399..a3e72ed858392 100644
--- a/pandas/_libs/hashtable_class_helper.pxi.in
+++ b/pandas/_libs/hashtable_class_helper.pxi.in
@@ -19,13 +19,6 @@ cdef kh{{name}}_t to_kh{{name}}_t({{name}}_t val) nogil:
     res.imag = val.imag
     return res
 
-
-cdef {{name}}_t to_{{name}}(kh{{name}}_t val) nogil:
-    cdef {{name}}_t res
-    res.real = val.real
-    res.imag = val.imag
-    return res
-
 {{endfor}}
 
 
diff --git a/pandas/_libs/hashtable_func_helper.pxi.in b/pandas/_libs/hashtable_func_helper.pxi.in
index 0335acb86591b..9c4c4fea22c5b 100644
--- a/pandas/_libs/hashtable_func_helper.pxi.in
+++ b/pandas/_libs/hashtable_func_helper.pxi.in
@@ -6,26 +6,26 @@ WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
 
 {{py:
 
-# dtype, ttype, c_type, to_c_type, to_dtype
+# dtype, ttype, c_type, to_c_type
 dtypes = [('complex128', 'complex128', 'khcomplex128_t',
-                         'to_khcomplex128_t', 'to_complex128'),
+                         'to_khcomplex128_t'),
           ('complex64', 'complex64', 'khcomplex64_t',
-                        'to_khcomplex64_t', 'to_complex64'),
-          ('float64', 'float64', 'float64_t', '', ''),
-          ('float32', 'float32', 'float32_t', '', ''),
-          ('uint64', 'uint64', 'uint64_t', '', ''),
-          ('uint32', 'uint32', 'uint32_t', '', ''),
-          ('uint16', 'uint16', 'uint16_t', '', ''),
-          ('uint8', 'uint8', 'uint8_t', '', ''),
-          ('object', 'pymap', 'object', '', ''),
-          ('int64', 'int64', 'int64_t', '', ''),
-          ('int32', 'int32', 'int32_t', '', ''),
-          ('int16', 'int16', 'int16_t', '', ''),
-          ('int8', 'int8', 'int8_t', '', '')]
+                        'to_khcomplex64_t'),
+          ('float64', 'float64', 'float64_t', ''),
+          ('float32', 'float32', 'float32_t', ''),
+          ('uint64', 'uint64', 'uint64_t', ''),
+          ('uint32', 'uint32', 'uint32_t', ''),
+          ('uint16', 'uint16', 'uint16_t', ''),
+          ('uint8', 'uint8', 'uint8_t', ''),
+          ('object', 'pymap', 'object', ''),
+          ('int64', 'int64', 'int64_t', ''),
+          ('int32', 'int32', 'int32_t', ''),
+          ('int16', 'int16', 'int16_t', ''),
+          ('int8', 'int8', 'int8_t', '')]
 
 }}
 
-{{for dtype, ttype, c_type, to_c_type, to_dtype in dtypes}}
+{{for dtype, ttype, c_type, to_c_type in dtypes}}
 
 
 @cython.wraparound(False)

From 773774b0dae2fa385b3a8e28b053be284f2f78c7 Mon Sep 17 00:00:00 2001
From: Egor Dranischnikow <egor.dranischnikow@googlemail.com>
Date: Tue, 5 Jan 2021 23:23:11 +0100
Subject: [PATCH 03/14] the order of keys is stable now  (the same as in the
 input), fix test cases

---
 pandas/tests/arrays/boolean/test_function.py     | 6 +++---
 pandas/tests/arrays/test_datetimes.py            | 2 +-
 pandas/tests/series/methods/test_value_counts.py | 2 +-
 pandas/tests/test_algos.py                       | 8 +++-----
 4 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/pandas/tests/arrays/boolean/test_function.py b/pandas/tests/arrays/boolean/test_function.py
index 0f8743489b412..d90655b6e2820 100644
--- a/pandas/tests/arrays/boolean/test_function.py
+++ b/pandas/tests/arrays/boolean/test_function.py
@@ -77,18 +77,18 @@ def test_ufunc_reduce_raises(values):
 def test_value_counts_na():
     arr = pd.array([True, False, pd.NA], dtype="boolean")
     result = arr.value_counts(dropna=False)
-    expected = pd.Series([1, 1, 1], index=[False, True, pd.NA], dtype="Int64")
+    expected = pd.Series([1, 1, 1], index=[True, False, pd.NA], dtype="Int64")
     tm.assert_series_equal(result, expected)
 
     result = arr.value_counts(dropna=True)
-    expected = pd.Series([1, 1], index=[False, True], dtype="Int64")
+    expected = pd.Series([1, 1], index=[True, False], dtype="Int64")
     tm.assert_series_equal(result, expected)
 
 
 def test_value_counts_with_normalize():
     s = pd.Series([True, False, pd.NA], dtype="boolean")
     result = s.value_counts(normalize=True)
-    expected = pd.Series([1, 1], index=[False, True], dtype="Float64") / 2
+    expected = pd.Series([1, 1], index=[True, False], dtype="Float64") / 2
     tm.assert_series_equal(result, expected)
 
 
diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py
index ea44e5d477fc6..587d3c466c631 100644
--- a/pandas/tests/arrays/test_datetimes.py
+++ b/pandas/tests/arrays/test_datetimes.py
@@ -288,7 +288,7 @@ def test_value_counts_preserves_tz(self):
 
         arr[-2] = pd.NaT
         result = arr.value_counts()
-        expected = pd.Series([1, 4, 2], index=[pd.NaT, dti[0], dti[1]])
+        expected = pd.Series([4, 2, 1], index=[dti[0], dti[1], pd.NaT])
         tm.assert_series_equal(result, expected)
 
     @pytest.mark.parametrize("method", ["pad", "backfill"])
diff --git a/pandas/tests/series/methods/test_value_counts.py b/pandas/tests/series/methods/test_value_counts.py
index f22b1be672190..2bed9940d1ebd 100644
--- a/pandas/tests/series/methods/test_value_counts.py
+++ b/pandas/tests/series/methods/test_value_counts.py
@@ -195,7 +195,7 @@ def test_value_counts_categorical_with_nan(self):
             (
                 Series(range(3), index=[True, False, np.nan]).index,
                 False,
-                Series([1, 1, 1], index=[pd.NA, False, True]),
+                Series([1, 1, 1], index=[np.nan, True, False]),
             ),
         ],
     )
diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py
index fb982c02acd99..88757b96085aa 100644
--- a/pandas/tests/test_algos.py
+++ b/pandas/tests/test_algos.py
@@ -6,7 +6,7 @@
 import pytest
 
 from pandas._libs import algos as libalgos, hashtable as ht
-from pandas.compat import IS64, np_array_datetime64_compat
+from pandas.compat import np_array_datetime64_compat
 import pandas.util._test_decorators as td
 
 from pandas.core.dtypes.common import (
@@ -1272,12 +1272,10 @@ def test_value_counts_uint64(self):
         tm.assert_series_equal(result, expected)
 
         arr = np.array([-1, 2 ** 63], dtype=object)
-        expected = Series([1, 1], index=[2 ** 63, -1])
+        expected = Series([1, 1], index=[-1, 2 ** 63])
         result = algos.value_counts(arr)
 
-        # 32-bit linux has a different ordering
-        if IS64:
-            tm.assert_series_equal(result, expected)
+        tm.assert_series_equal(result, expected)
 
 
 class TestDuplicated:

From d96118f89a42def6ebebc607a06633fdc798ad2d Mon Sep 17 00:00:00 2001
From: Egor Dranischnikow <egor.dranischnikow@googlemail.com>
Date: Tue, 5 Jan 2021 23:28:42 +0100
Subject: [PATCH 04/14] adding a test testing explicitly for stability

---
 pandas/tests/libs/test_hashtable.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/pandas/tests/libs/test_hashtable.py b/pandas/tests/libs/test_hashtable.py
index 4f650807afd30..ec01009b23954 100644
--- a/pandas/tests/libs/test_hashtable.py
+++ b/pandas/tests/libs/test_hashtable.py
@@ -272,6 +272,14 @@ def test_value_count(self, dtype, type_suffix, writable):
         tm.assert_numpy_array_equal(np.sort(keys), expected)
         assert np.all(counts == 5)
 
+    def test_value_count_stable(self, dtype, type_suffix, writable):
+        value_count = get_ht_function("value_count", type_suffix)
+        values = np.array([2, 1, 5, 22, 3, -1, 8]).astype(dtype)
+        values.flags.writeable = writable
+        keys, counts = value_count(values, False)
+        tm.assert_numpy_array_equal(keys, values)
+        assert np.all(counts == 1)
+
     def test_duplicated_first(self, dtype, type_suffix, writable):
         N = 100
         duplicated = get_ht_function("duplicated", type_suffix)

From ca68f30cc19a3f83c685d68084ad7aa8f6a4b187 Mon Sep 17 00:00:00 2001
From: Egor Dranischnikow <egor.dranischnikow@googlemail.com>
Date: Wed, 6 Jan 2021 00:11:58 +0100
Subject: [PATCH 05/14] fixing docstring tests

---
 pandas/core/base.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/pandas/core/base.py b/pandas/core/base.py
index 631f67ced77dd..09e2e80f45b3d 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -1004,9 +1004,9 @@ def value_counts(
         >>> index = pd.Index([3, 1, 2, 3, 4, np.nan])
         >>> index.value_counts()
         3.0    2
+        1.0    1
         2.0    1
         4.0    1
-        1.0    1
         dtype: int64
 
         With `normalize` set to `True`, returns the relative frequency by
@@ -1015,9 +1015,9 @@ def value_counts(
         >>> s = pd.Series([3, 1, 2, 3, 4, np.nan])
         >>> s.value_counts(normalize=True)
         3.0    0.4
+        1.0    0.2
         2.0    0.2
         4.0    0.2
-        1.0    0.2
         dtype: float64
 
         **bins**
@@ -1039,10 +1039,10 @@ def value_counts(
 
         >>> s.value_counts(dropna=False)
         3.0    2
+        1.0    1
         2.0    1
-        NaN    1
         4.0    1
-        1.0    1
+        NaN    1
         dtype: int64
         """
         return value_counts(

From 7fd91530b616a13645ab0e4b74912b81d2bbf902 Mon Sep 17 00:00:00 2001
From: Egor Dranischnikow <egor.dranischnikow@googlemail.com>
Date: Fri, 8 Jan 2021 22:14:22 +0100
Subject: [PATCH 06/14] keep original order also for null-objects for
 dtype=np.object

---
 pandas/_libs/hashtable_func_helper.pxi.in | 19 +++++++------------
 pandas/core/algorithms.py                 |  5 -----
 2 files changed, 7 insertions(+), 17 deletions(-)

diff --git a/pandas/_libs/hashtable_func_helper.pxi.in b/pandas/_libs/hashtable_func_helper.pxi.in
index 9c4c4fea22c5b..31e67b8bd57a4 100644
--- a/pandas/_libs/hashtable_func_helper.pxi.in
+++ b/pandas/_libs/hashtable_func_helper.pxi.in
@@ -77,9 +77,9 @@ cdef build_count_table_{{dtype}}(const {{dtype}}_t[:] values,
 @cython.wraparound(False)
 @cython.boundscheck(False)
 {{if dtype == 'object'}}
-cpdef stable_value_count_{{dtype}}(ndarray[{{dtype}}] values, bint dropna):
+cpdef value_count_{{dtype}}(ndarray[{{dtype}}] values, bint dropna, navalue=np.NaN):
 {{else}}
-cpdef stable_value_count_{{dtype}}(const {{dtype}}_t[:] values, bint dropna):
+cpdef value_count_{{dtype}}(const {{dtype}}_t[:] values, bint dropna):
 {{endif}}
     cdef:
         Py_ssize_t i = 0
@@ -90,6 +90,7 @@ cpdef stable_value_count_{{dtype}}(const {{dtype}}_t[:] values, bint dropna):
 
         # Don't use Py_ssize_t, since table.n_buckets is unsigned
         khiter_t k
+        bint is_null
 
         {{c_type}} val
 
@@ -108,7 +109,10 @@ cpdef stable_value_count_{{dtype}}(const {{dtype}}_t[:] values, bint dropna):
 
     for i in range(n):
         val = values[i]
-        if not checknull(val) or not dropna:
+        is_null = checknull(val)
+        if is_null:
+            val = navalue
+        if not is_null or not dropna:
             k = kh_get_{{ttype}}(table, <PyObject*>val)
             if k != table.n_buckets:
                 unique_key_index = table.vals[k]
@@ -143,15 +147,6 @@ cpdef stable_value_count_{{dtype}}(const {{dtype}}_t[:] values, bint dropna):
     return result_keys.to_array(), result_counts.to_array()
 
 
-{{if dtype == 'object'}}
-cpdef value_count_{{dtype}}(ndarray[{{dtype}}] values, bint dropna):
-    return stable_value_count_{{dtype}}(values, 1)
-{{else}}
-cpdef value_count_{{dtype}}(const {{dtype}}_t[:] values, bint dropna):
-    return stable_value_count_{{dtype}}(values, dropna)
-{{endif}}
-
-
 @cython.wraparound(False)
 @cython.boundscheck(False)
 {{if dtype == 'object'}}
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index ed7ae75117c5c..968b39088e684 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -866,11 +866,6 @@ def value_counts_arraylike(values, dropna: bool):
         f = getattr(htable, f"value_count_{ndtype}")
         keys, counts = f(values, dropna)
 
-        mask = isna(values)
-        if not dropna and mask.any() and not isna(keys).any():
-            keys = np.insert(keys, 0, np.NaN)
-            counts = np.insert(counts, 0, mask.sum())
-
     keys = _reconstruct_data(keys, original.dtype, original)
 
     return keys, counts

From 9962c3a1c0c2b38dff57b22044715ca6746f0cb2 Mon Sep 17 00:00:00 2001
From: Egor Dranischnikow <egor.dranischnikow@googlemail.com>
Date: Fri, 8 Jan 2021 22:20:15 +0100
Subject: [PATCH 07/14] fixing test cases where the position of NaN has changed

---
 pandas/tests/arrays/string_/test_string.py       | 2 +-
 pandas/tests/series/methods/test_value_counts.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py
index 5365929213503..d14de990d8268 100644
--- a/pandas/tests/arrays/string_/test_string.py
+++ b/pandas/tests/arrays/string_/test_string.py
@@ -497,7 +497,7 @@ def test_value_counts_na(dtype, request):
 
     arr = pd.array(["a", "b", "a", pd.NA], dtype=dtype)
     result = arr.value_counts(dropna=False)
-    expected = pd.Series([2, 1, 1], index=["a", pd.NA, "b"], dtype="Int64")
+    expected = pd.Series([2, 1, 1], index=["a", "b", pd.NA], dtype="Int64")
     tm.assert_series_equal(result, expected)
 
     result = arr.value_counts(dropna=True)
diff --git a/pandas/tests/series/methods/test_value_counts.py b/pandas/tests/series/methods/test_value_counts.py
index 2bed9940d1ebd..505b879660ff1 100644
--- a/pandas/tests/series/methods/test_value_counts.py
+++ b/pandas/tests/series/methods/test_value_counts.py
@@ -185,7 +185,7 @@ def test_value_counts_categorical_with_nan(self):
             (
                 Series([False, True, True, pd.NA]),
                 False,
-                Series([2, 1, 1], index=[True, pd.NA, False]),
+                Series([2, 1, 1], index=[True, False, pd.NA]),
             ),
             (
                 Series([False, True, True, pd.NA]),
@@ -195,7 +195,7 @@ def test_value_counts_categorical_with_nan(self):
             (
                 Series(range(3), index=[True, False, np.nan]).index,
                 False,
-                Series([1, 1, 1], index=[np.nan, True, False]),
+                Series([1, 1, 1], index=[True, False, np.nan]),
             ),
         ],
     )

From 679044ba2437c8ff2595182347a45381fb371406 Mon Sep 17 00:00:00 2001
From: Egor Dranischnikow <egor.dranischnikow@googlemail.com>
Date: Thu, 14 Jan 2021 22:21:31 +0100
Subject: [PATCH 08/14] introduce name rather than creating XXXVector-names

---
 pandas/_libs/hashtable_func_helper.pxi.in | 40 ++++++++++-------------
 1 file changed, 18 insertions(+), 22 deletions(-)

diff --git a/pandas/_libs/hashtable_func_helper.pxi.in b/pandas/_libs/hashtable_func_helper.pxi.in
index 31e67b8bd57a4..aead3a2e97dd4 100644
--- a/pandas/_libs/hashtable_func_helper.pxi.in
+++ b/pandas/_libs/hashtable_func_helper.pxi.in
@@ -6,26 +6,26 @@ WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
 
 {{py:
 
-# dtype, ttype, c_type, to_c_type
-dtypes = [('complex128', 'complex128', 'khcomplex128_t',
-                         'to_khcomplex128_t'),
-          ('complex64', 'complex64', 'khcomplex64_t',
-                        'to_khcomplex64_t'),
-          ('float64', 'float64', 'float64_t', ''),
-          ('float32', 'float32', 'float32_t', ''),
-          ('uint64', 'uint64', 'uint64_t', ''),
-          ('uint32', 'uint32', 'uint32_t', ''),
-          ('uint16', 'uint16', 'uint16_t', ''),
-          ('uint8', 'uint8', 'uint8_t', ''),
-          ('object', 'pymap', 'object', ''),
-          ('int64', 'int64', 'int64_t', ''),
-          ('int32', 'int32', 'int32_t', ''),
-          ('int16', 'int16', 'int16_t', ''),
-          ('int8', 'int8', 'int8_t', '')]
+# name, dtype, ttype, c_type, to_c_type
+dtypes = [('Complex128', 'complex128', 'complex128',
+                         'khcomplex128_t', 'to_khcomplex128_t'),
+          ('Complex64', 'complex64', 'complex64',
+                        'khcomplex64_t', 'to_khcomplex64_t'),
+          ('Float64', 'float64', 'float64', 'float64_t', ''),
+          ('Float32', 'float32', 'float32', 'float32_t', ''),
+          ('UInt64', 'uint64', 'uint64', 'uint64_t', ''),
+          ('UInt32', 'uint32', 'uint32', 'uint32_t', ''),
+          ('UInt16', 'uint16', 'uint16', 'uint16_t', ''),
+          ('UInt8', 'uint8', 'uint8', 'uint8_t', ''),
+          ('Object', 'object', 'pymap', 'object', ''),
+          ('Int64', 'int64', 'int64', 'int64_t', ''),
+          ('Int32', 'int32', 'int32', 'int32_t', ''),
+          ('Int16', 'int16', 'int16', 'int16_t', ''),
+          ('Int8', 'int8', 'int8', 'int8_t', '')]
 
 }}
 
-{{for dtype, ttype, c_type, to_c_type in dtypes}}
+{{for name, dtype, ttype, c_type, to_c_type in dtypes}}
 
 
 @cython.wraparound(False)
@@ -96,11 +96,7 @@ cpdef value_count_{{dtype}}(const {{dtype}}_t[:] values, bint dropna):
 
         int ret = 0
 
-    {{if dtype[0]!='u'}}
-    result_keys = {{dtype.title()}}Vector()
-    {{else}}
-    result_keys = {{'U'+dtype[1::].title()}}Vector()
-    {{endif}}
+    result_keys = {{name}}Vector()
     result_counts = Int64Vector()
     table = kh_init_{{ttype}}()
 

From 8097e5b73300c2eadb6a7df9ba9a4b9c8ea3b29c Mon Sep 17 00:00:00 2001
From: Egor Dranischnikow <egor.dranischnikow@googlemail.com>
Date: Thu, 14 Jan 2021 22:30:22 +0100
Subject: [PATCH 09/14] adding explanation

---
 pandas/_libs/hashtable_func_helper.pxi.in | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/pandas/_libs/hashtable_func_helper.pxi.in b/pandas/_libs/hashtable_func_helper.pxi.in
index aead3a2e97dd4..b236a8db5f63f 100644
--- a/pandas/_libs/hashtable_func_helper.pxi.in
+++ b/pandas/_libs/hashtable_func_helper.pxi.in
@@ -96,6 +96,12 @@ cpdef value_count_{{dtype}}(const {{dtype}}_t[:] values, bint dropna):
 
         int ret = 0
 
+    # we track the order in which keys are first seen (GH39009),
+    # khash-map isn't insertion-ordered, thus:
+    #    table maps key to index_of_appearence
+    #    result_keys maps index_of_appearence to key
+    #    result_counts maps index_of_appearence to number of elements
+
     result_keys = {{name}}Vector()
     result_counts = Int64Vector()
     table = kh_init_{{ttype}}()

From 95f2247cc04a20db42dd5d47c375dbd46358b99b Mon Sep 17 00:00:00 2001
From: Egor Dranischnikow <egor.dranischnikow@googlemail.com>
Date: Thu, 14 Jan 2021 22:37:12 +0100
Subject: [PATCH 10/14] adding issue-id to comment

---
 pandas/tests/libs/test_hashtable.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/tests/libs/test_hashtable.py b/pandas/tests/libs/test_hashtable.py
index ec01009b23954..5bf652c206a5f 100644
--- a/pandas/tests/libs/test_hashtable.py
+++ b/pandas/tests/libs/test_hashtable.py
@@ -273,6 +273,7 @@ def test_value_count(self, dtype, type_suffix, writable):
         assert np.all(counts == 5)
 
     def test_value_count_stable(self, dtype, type_suffix, writable):
+        # GH12679
         value_count = get_ht_function("value_count", type_suffix)
         values = np.array([2, 1, 5, 22, 3, -1, 8]).astype(dtype)
         values.flags.writeable = writable

From d480b7c8777f29862c92dc95f4f049788eb6f587 Mon Sep 17 00:00:00 2001
From: Egor Dranischnikow <egor.dranischnikow@googlemail.com>
Date: Thu, 14 Jan 2021 22:40:57 +0100
Subject: [PATCH 11/14] adding whatsnew

---
 doc/source/whatsnew/v1.3.0.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
index ff11ebc022ffb..af39e93e3b3d2 100644
--- a/doc/source/whatsnew/v1.3.0.rst
+++ b/doc/source/whatsnew/v1.3.0.rst
@@ -356,6 +356,7 @@ Reshaping
 - Bug in :func:`join` over :class:`MultiIndex` returned wrong result, when one of both indexes had only one level (:issue:`36909`)
 - :meth:`merge_asof` raises ``ValueError`` instead of cryptic ``TypeError`` in case of non-numerical merge columns (:issue:`29130`)
 - Bug in :meth:`DataFrame.join` not assigning values correctly when having :class:`MultiIndex` where at least one dimension is from dtype ``Categorical`` with non-alphabetically sorted categories (:issue:`38502`)
+- ``value_counts`` returns keys in original order (:issue:`12679`)
 - Bug in :meth:`DataFrame.apply` would give incorrect results when used with a string argument and ``axis=1`` when the axis argument was not supported and now raises a ``ValueError`` instead (:issue:`39211`)
 -
 

From 90d0c523f464b3b72bfc7cf25102ec099a85035e Mon Sep 17 00:00:00 2001
From: Egor Dranischnikow <egor.dranischnikow@googlemail.com>
Date: Fri, 22 Jan 2021 07:14:56 +0100
Subject: [PATCH 12/14] fixing a newly created test

---
 pandas/tests/frame/methods/test_describe.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/frame/methods/test_describe.py b/pandas/tests/frame/methods/test_describe.py
index ed1c3fcce378c..15bafb7a835ba 100644
--- a/pandas/tests/frame/methods/test_describe.py
+++ b/pandas/tests/frame/methods/test_describe.py
@@ -371,7 +371,7 @@ def test_describe_does_not_raise_error_for_dictlike_elements(self):
         # GH#32409
         df = DataFrame([{"test": {"a": "1"}}, {"test": {"a": "2"}}])
         expected = DataFrame(
-            {"test": [2, 2, {"a": "2"}, 1]}, index=["count", "unique", "top", "freq"]
+            {"test": [2, 2, {"a": "1"}, 1]}, index=["count", "unique", "top", "freq"]
         )
         result = df.describe()
         tm.assert_frame_equal(result, expected)

From d85b1aa47fed1da8d92b3843bb2736816815aa9d Mon Sep 17 00:00:00 2001
From: Egor Dranischnikow <egor.dranischnikow@googlemail.com>
Date: Fri, 22 Jan 2021 15:56:47 +0100
Subject: [PATCH 13/14] improve whatsnew

---
 doc/source/whatsnew/v1.3.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
index af39e93e3b3d2..2234b870ac9c0 100644
--- a/doc/source/whatsnew/v1.3.0.rst
+++ b/doc/source/whatsnew/v1.3.0.rst
@@ -356,7 +356,7 @@ Reshaping
 - Bug in :func:`join` over :class:`MultiIndex` returned wrong result, when one of both indexes had only one level (:issue:`36909`)
 - :meth:`merge_asof` raises ``ValueError`` instead of cryptic ``TypeError`` in case of non-numerical merge columns (:issue:`29130`)
 - Bug in :meth:`DataFrame.join` not assigning values correctly when having :class:`MultiIndex` where at least one dimension is from dtype ``Categorical`` with non-alphabetically sorted categories (:issue:`38502`)
-- ``value_counts`` returns keys in original order (:issue:`12679`)
+- :meth:`Series.value_counts` returns keys in original order (:issue:`12679`, :issue:`11227`)
 - Bug in :meth:`DataFrame.apply` would give incorrect results when used with a string argument and ``axis=1`` when the axis argument was not supported and now raises a ``ValueError`` instead (:issue:`39211`)
 -
 

From 230215fe6dccf5cbd2bee427eecf342139dfd037 Mon Sep 17 00:00:00 2001
From: Egor Dranischnikow <egor.dranischnikow@googlemail.com>
Date: Fri, 22 Jan 2021 16:03:57 +0100
Subject: [PATCH 14/14] reorder ifs

---
 pandas/_libs/hashtable_func_helper.pxi.in | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/pandas/_libs/hashtable_func_helper.pxi.in b/pandas/_libs/hashtable_func_helper.pxi.in
index b236a8db5f63f..b4da5a3c7fb09 100644
--- a/pandas/_libs/hashtable_func_helper.pxi.in
+++ b/pandas/_libs/hashtable_func_helper.pxi.in
@@ -112,9 +112,10 @@ cpdef value_count_{{dtype}}(const {{dtype}}_t[:] values, bint dropna):
     for i in range(n):
         val = values[i]
         is_null = checknull(val)
-        if is_null:
-            val = navalue
         if not is_null or not dropna:
+            # all nas become the same representative:
+            if is_null:
+                val = navalue
             k = kh_get_{{ttype}}(table, <PyObject*>val)
             if k != table.n_buckets:
                 unique_key_index = table.vals[k]