From c97b677c95a62a83b4d49640217642fa81005e7e Mon Sep 17 00:00:00 2001
From: Egor Dranischnikow <egor.dranischnikow@googlemail.com>
Date: Mon, 26 Jul 2021 20:38:05 +0200
Subject: [PATCH 01/11] do not mangle nans in value_count

---
 pandas/_libs/hashtable_func_helper.pxi.in | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/pandas/_libs/hashtable_func_helper.pxi.in b/pandas/_libs/hashtable_func_helper.pxi.in
index fb8ce79a924a4..05cf957a86073 100644
--- a/pandas/_libs/hashtable_func_helper.pxi.in
+++ b/pandas/_libs/hashtable_func_helper.pxi.in
@@ -31,7 +31,7 @@ dtypes = [('Complex128', 'complex128', 'complex128',
 @cython.wraparound(False)
 @cython.boundscheck(False)
 {{if dtype == 'object'}}
-cdef value_count_{{dtype}}(ndarray[{{dtype}}] values, bint dropna, navalue=np.NaN):
+cdef value_count_{{dtype}}(ndarray[{{dtype}}] values, bint dropna):
 {{else}}
 cdef value_count_{{dtype}}(const {{dtype}}_t[:] values, bint dropna):
 {{endif}}
@@ -63,9 +63,6 @@ cdef value_count_{{dtype}}(const {{dtype}}_t[:] values, bint dropna):
         val = values[i]
         is_null = checknull(val)
         if not is_null or not dropna:
-            # all nas become the same representative:
-            if is_null:
-                val = navalue
             k = kh_get_{{ttype}}(table, <PyObject*>val)
             if k != table.n_buckets:
                 table.vals[k] += 1

From 34b960b5988b06d067c71949adb1942abdfc2168 Mon Sep 17 00:00:00 2001
From: Egor Dranischnikow <egor.dranischnikow@googlemail.com>
Date: Tue, 27 Jul 2021 06:58:10 +0200
Subject: [PATCH 02/11] fix test cases which assumed mangled nans

---
 pandas/tests/base/test_value_counts.py | 2 +-
 pandas/tests/libs/test_hashtable.py    | 9 +++------
 2 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/pandas/tests/base/test_value_counts.py b/pandas/tests/base/test_value_counts.py
index 5431baf493260..23bb4c5d2670c 100644
--- a/pandas/tests/base/test_value_counts.py
+++ b/pandas/tests/base/test_value_counts.py
@@ -281,5 +281,5 @@ def test_value_counts_with_nan(dropna, index_or_series):
     if dropna is True:
         expected = Series([1], index=[True])
     else:
-        expected = Series([2, 1], index=[pd.NA, True])
+        expected = Series([1, 1, 1], index=[True, pd.NA, np.nan])
     tm.assert_series_equal(res, expected)
diff --git a/pandas/tests/libs/test_hashtable.py b/pandas/tests/libs/test_hashtable.py
index bdc02ff0aa7a8..3969177a63044 100644
--- a/pandas/tests/libs/test_hashtable.py
+++ b/pandas/tests/libs/test_hashtable.py
@@ -453,13 +453,10 @@ def test_mode_stable(self, dtype, writable):
 
 
 def test_modes_with_nans():
-    # GH39007
-    values = np.array([True, pd.NA, np.nan], dtype=np.object_)
-    # pd.Na and np.nan will have the same representative: np.nan
-    # thus we have 2 nans and 1 True
+    # GH42688, nans aren't mangled
+    values = np.array([True, pd.NA, np.nan, pd.NA, np.nan], dtype=np.object_)
     modes = ht.mode(values, False)
-    assert modes.size == 1
-    assert np.isnan(modes[0])
+    assert modes.size == 2
 
 
 def test_unique_label_indices_intp(writable):

From ead1da407a11e7682de8b7ca1a51d18fedbaebf1 Mon Sep 17 00:00:00 2001
From: Egor Dranischnikow <egor.dranischnikow@googlemail.com>
Date: Tue, 27 Jul 2021 07:07:15 +0200
Subject: [PATCH 03/11] is_null only if really needed

---
 pandas/_libs/hashtable_func_helper.pxi.in | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/pandas/_libs/hashtable_func_helper.pxi.in b/pandas/_libs/hashtable_func_helper.pxi.in
index 05cf957a86073..e5e64f8dc7b5f 100644
--- a/pandas/_libs/hashtable_func_helper.pxi.in
+++ b/pandas/_libs/hashtable_func_helper.pxi.in
@@ -42,7 +42,6 @@ cdef value_count_{{dtype}}(const {{dtype}}_t[:] values, bint dropna):
 
         # Don't use Py_ssize_t, since table.n_buckets is unsigned
         khiter_t k
-        bint is_null
 
         {{c_type}} val
 
@@ -61,8 +60,7 @@ cdef value_count_{{dtype}}(const {{dtype}}_t[:] values, bint dropna):
 
     for i in range(n):
         val = values[i]
-        is_null = checknull(val)
-        if not is_null or not dropna:
+        if not dropna or not checknull(val):
             k = kh_get_{{ttype}}(table, <PyObject*>val)
             if k != table.n_buckets:
                 table.vals[k] += 1

From d53c64c99f352940c117754de24f9136e8ebd66e Mon Sep 17 00:00:00 2001
From: Egor Dranischnikow <egor.dranischnikow@googlemail.com>
Date: Tue, 27 Jul 2021 07:13:43 +0200
Subject: [PATCH 04/11] add asv tests

---
 asv_bench/benchmarks/series_methods.py | 26 +++++++++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/asv_bench/benchmarks/series_methods.py b/asv_bench/benchmarks/series_methods.py
index 155dd6f8e13a0..ea4d3a54a89e8 100644
--- a/asv_bench/benchmarks/series_methods.py
+++ b/asv_bench/benchmarks/series_methods.py
@@ -146,12 +146,24 @@ class ValueCounts:
     param_names = ["N", "dtype"]
 
     def setup(self, N, dtype):
-        self.s = Series(np.random.randint(0, N, size=10 * N)).astype(dtype)
+        self.s = Series(np.random.randint(0, N, size=10 * N)).astype("object")
 
     def time_value_counts(self, N, dtype):
         self.s.value_counts()
 
 
+class ValueCountsObjectDropNAFalse:
+
+    params = [10 ** 3, 10 ** 4, 10 ** 5]
+    param_names = ["N"]
+
+    def setup(self, N):
+        self.s = Series(np.random.randint(0, N, size=10 * N)).astype("object")
+
+    def time_value_counts(self, N):
+        self.s.value_counts(dropna=False)
+
+
 class Mode:
 
     params = [[10 ** 3, 10 ** 4, 10 ** 5], ["int", "uint", "float", "object"]]
@@ -164,6 +176,18 @@ def time_mode(self, N, dtype):
         self.s.mode()
 
 
+class ModeObjectDropNAFalse:
+
+    params = [10 ** 3, 10 ** 4, 10 ** 5]
+    param_names = ["N"]
+
+    def setup(self, N):
+        self.s = Series(np.random.randint(0, N, size=10 * N)).astype("object")
+
+    def time_mode(self, N):
+        self.s.mode(dropna=False)
+
+
 class Dir:
     def setup(self):
         self.s = Series(index=tm.makeStringIndex(10000))

From 5b02c52391268824cb278eba0b61ad7160d2e299 Mon Sep 17 00:00:00 2001
From: Egor Dranischnikow <egor.dranischnikow@googlemail.com>
Date: Tue, 27 Jul 2021 09:08:15 +0200
Subject: [PATCH 05/11] fix overlooked test case

---
 pandas/tests/indexing/test_indexing.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py
index a10288b2091ca..49fb953e9342d 100644
--- a/pandas/tests/indexing/test_indexing.py
+++ b/pandas/tests/indexing/test_indexing.py
@@ -790,8 +790,8 @@ def test_label_indexing_on_nan(self):
         # GH 32431
         df = Series([1, "{1,2}", 1, None])
         vc = df.value_counts(dropna=False)
-        result1 = vc.loc[np.nan]
-        result2 = vc[np.nan]
+        result1 = vc.loc[None]
+        result2 = vc[None]
 
         expected = 1
         assert result1 == expected

From 94d33785d7bbc91dd579c4134b0d1062f7b06ee9 Mon Sep 17 00:00:00 2001
From: Egor Dranischnikow <egor.dranischnikow@googlemail.com>
Date: Fri, 30 Jul 2021 16:35:02 +0200
Subject: [PATCH 06/11] fixing reverting wrong change of asv-test

---
 asv_bench/benchmarks/series_methods.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/asv_bench/benchmarks/series_methods.py b/asv_bench/benchmarks/series_methods.py
index ea4d3a54a89e8..d8578ed604ae3 100644
--- a/asv_bench/benchmarks/series_methods.py
+++ b/asv_bench/benchmarks/series_methods.py
@@ -146,7 +146,7 @@ class ValueCounts:
     param_names = ["N", "dtype"]
 
     def setup(self, N, dtype):
-        self.s = Series(np.random.randint(0, N, size=10 * N)).astype("object")
+        self.s = Series(np.random.randint(0, N, size=10 * N)).astype(dtype)
 
     def time_value_counts(self, N, dtype):
         self.s.value_counts()

From 55d6f6ef3f3d1d6940de0e7da9581e51b79d8d70 Mon Sep 17 00:00:00 2001
From: Egor Dranischnikow <egor.dranischnikow@googlemail.com>
Date: Fri, 30 Jul 2021 16:45:42 +0200
Subject: [PATCH 07/11] adding whatsnew note

---
 doc/source/whatsnew/v1.4.0.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
index 99a66c7e5454b..1719d4d73ba10 100644
--- a/doc/source/whatsnew/v1.4.0.rst
+++ b/doc/source/whatsnew/v1.4.0.rst
@@ -549,6 +549,7 @@ Missing
 ^^^^^^^
 - Bug in :meth:`DataFrame.fillna` with limit and no method ignores axis='columns' or ``axis = 1`` (:issue:`40989`)
 - Bug in :meth:`DataFrame.fillna` not replacing missing values when using a dict-like ``value`` and duplicate column names (:issue:`43476`)
+- :meth:`Series.value_counts` and :meth:`Series.mode` no longer coerce ``None``, ``NaT`` and other null-values to a NaN-value for ``np.object``-dtype. This behavior is now consistent with ``unique``, ``isin`` and others (:issue:`42688`)
 -
 
 MultiIndex

From e3fd597581e4cab60280f89e6139bdeb53b20ee8 Mon Sep 17 00:00:00 2001
From: Egor Dranischnikow <egor.dranischnikow@googlemail.com>
Date: Fri, 30 Jul 2021 17:00:02 +0200
Subject: [PATCH 08/11] parametrize test

---
 pandas/tests/indexing/test_indexing.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py
index 49fb953e9342d..7c7e9f79a77ae 100644
--- a/pandas/tests/indexing/test_indexing.py
+++ b/pandas/tests/indexing/test_indexing.py
@@ -786,12 +786,12 @@ def test_no_reference_cycle(self):
         del df
         assert wr() is None
 
-    def test_label_indexing_on_nan(self):
+    def test_label_indexing_on_nan(self, nulls_fixture):
         # GH 32431
-        df = Series([1, "{1,2}", 1, None])
+        df = Series([1, "{1,2}", 1, nulls_fixture])
         vc = df.value_counts(dropna=False)
-        result1 = vc.loc[None]
-        result2 = vc[None]
+        result1 = vc.loc[nulls_fixture]
+        result2 = vc[nulls_fixture]
 
         expected = 1
         assert result1 == expected

From 088b4946fc348f3debdb83e75b72bcfad7279f1f Mon Sep 17 00:00:00 2001
From: Egor Dranischnikow <egor.dranischnikow@googlemail.com>
Date: Tue, 9 Nov 2021 20:53:32 +0100
Subject: [PATCH 09/11] moving whatsnew note to notable fixes

---
 doc/source/whatsnew/v1.4.0.rst | 33 ++++++++++++++++++++++++++++++++-
 1 file changed, 32 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
index 1719d4d73ba10..e281de348a853 100644
--- a/doc/source/whatsnew/v1.4.0.rst
+++ b/doc/source/whatsnew/v1.4.0.rst
@@ -240,6 +240,38 @@ Now the float-dtype is respected. Since the common dtype for these DataFrames is
 
 *New behavior*:
 
+.. ipython:: python
+
+    res
+
+.. _whatsnew_140.notable_bug_fixes.value_counts_and_mode_do_not_coerse_to_nan:
+
+Null-values are no longer coerced to NaN-value in value_counts and mode
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+:meth:`Series.value_counts` and :meth:`Series.mode` no longer coerce ``None``, ``NaT`` and other null-values to a NaN-value for ``np.object``-dtype. This behavior is now consistent with ``unique``, ``isin`` and others (:issue:`42688`).
+
+.. ipython:: python
+
+    s = pd.Series([True, None, pd.NaT, None, pd.NaT, None])
+    res = s.value_counts(dropna=False)
+
+Previously, all null-values were replaced by a NaN-value.
+
+*Previous behavior*:
+
+.. code-block:: ipython
+
+    In [3]: res
+    Out[3]:
+    NaN     5
+    True    1
+    dtype: int64
+
+Now null-values are no longer mangled.
+
+*New behavior*:
+
 .. ipython:: python
 
     res
@@ -549,7 +581,6 @@ Missing
 ^^^^^^^
 - Bug in :meth:`DataFrame.fillna` with limit and no method ignores axis='columns' or ``axis = 1`` (:issue:`40989`)
 - Bug in :meth:`DataFrame.fillna` not replacing missing values when using a dict-like ``value`` and duplicate column names (:issue:`43476`)
-- :meth:`Series.value_counts` and :meth:`Series.mode` no longer coerce ``None``, ``NaT`` and other null-values to a NaN-value for ``np.object``-dtype. This behavior is now consistent with ``unique``, ``isin`` and others (:issue:`42688`)
 -
 
 MultiIndex

From 792cb7b483665bc597f8c972dc45666f1c1ac4db Mon Sep 17 00:00:00 2001
From: Egor Dranischnikow <egor.dranischnikow@googlemail.com>
Date: Tue, 9 Nov 2021 21:10:05 +0100
Subject: [PATCH 10/11] adding pd.NaT to tested null-objects

---
 pandas/tests/libs/test_hashtable.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/libs/test_hashtable.py b/pandas/tests/libs/test_hashtable.py
index 3969177a63044..937eccf7a0afe 100644
--- a/pandas/tests/libs/test_hashtable.py
+++ b/pandas/tests/libs/test_hashtable.py
@@ -454,9 +454,10 @@ def test_mode_stable(self, dtype, writable):
 
 def test_modes_with_nans():
     # GH42688, nans aren't mangled
-    values = np.array([True, pd.NA, np.nan, pd.NA, np.nan], dtype=np.object_)
+    nulls = [pd.NA, np.nan, pd.NaT, None]
+    values = np.array([True] + nulls * 2, dtype=np.object_)
     modes = ht.mode(values, False)
-    assert modes.size == 2
+    assert modes.size == len(nulls)
 
 
 def test_unique_label_indices_intp(writable):

From 89ececeb823cf9c3ffb7612ea8ebe25952f91d1a Mon Sep 17 00:00:00 2001
From: Egor Dranischnikow <egor.dranischnikow@googlemail.com>
Date: Wed, 10 Nov 2021 10:39:55 +0100
Subject: [PATCH 11/11] fixing title line

---
 doc/source/whatsnew/v1.4.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
index e281de348a853..c909b58b32add 100644
--- a/doc/source/whatsnew/v1.4.0.rst
+++ b/doc/source/whatsnew/v1.4.0.rst
@@ -247,7 +247,7 @@ Now the float-dtype is respected. Since the common dtype for these DataFrames is
 .. _whatsnew_140.notable_bug_fixes.value_counts_and_mode_do_not_coerse_to_nan:
 
 Null-values are no longer coerced to NaN-value in value_counts and mode
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 :meth:`Series.value_counts` and :meth:`Series.mode` no longer coerce ``None``, ``NaT`` and other null-values to a NaN-value for ``np.object``-dtype. This behavior is now consistent with ``unique``, ``isin`` and others (:issue:`42688`).