From 304c369422d8b7b8b5ed285be3fc3dd4b134799c Mon Sep 17 00:00:00 2001
From: Stefan Mejlgaard <connesy@gmail.com>
Date: Wed, 5 May 2021 15:39:30 +0000
Subject: [PATCH 1/6] ENH: Add dropna argument to DataFrame.value_counts()

---
 pandas/core/frame.py                          | 30 ++++++++++++++-
 .../tests/frame/methods/test_value_counts.py  | 38 +++++++++++++++++++
 2 files changed, 67 insertions(+), 1 deletion(-)
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 50837e1b3ed50..126683f569340 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -6380,6 +6380,7 @@ def value_counts(
         normalize: bool = False,
         sort: bool = True,
         ascending: bool = False,
+        dropna: bool = True,
     ):
         """
         Return a Series containing counts of unique rows in the DataFrame.
@@ -6396,6 +6397,8 @@ def value_counts(
             Sort by frequencies.
         ascending : bool, default False
             Sort in ascending order.
+        dropna : bool, default True
+            Don’t include counts of rows that contain NA values.
 
         Returns
         -------
@@ -6451,11 +6454,36 @@ def value_counts(
         2         2            0.25
         6         0            0.25
         dtype: float64
+
+        **dropna**
+        With `dropna` set to `False` we can also count rows with NA values.
+        >>> df = pd.DataFrame({'first_name': ['John', 'Anne', 'John', 'Beth'],
+        ...                    'middle_name': ['Smith', pd.NA, pd.NA, 'Louise']})
+        >>> df
+          first_name middle_name
+        0       John       Smith
+        1       Anne        <NA>
+        2       John        <NA>
+        3       Beth      Louise
+
+        >>> df.value_counts()  # dropna is True per default
+        first_name  middle_name
+        Beth        Louise         1
+        John        Smith          1
+        dtype: int64
+
+        >>> df.value_counts(dropna=False)
+        first_name  middle_name
+        Anne        NaN            1
+        Beth        Louise         1
+        John        Smith          1
+                    NaN            1
+        dtype: int64
         """
         if subset is None:
             subset = self.columns.tolist()
 
-        counts = self.groupby(subset).grouper.size()
+        counts = self.groupby(subset, dropna=dropna).grouper.size()
 
         if sort:
             counts = counts.sort_values(ascending=ascending)
diff --git a/pandas/tests/frame/methods/test_value_counts.py b/pandas/tests/frame/methods/test_value_counts.py
index 23f9ebdb4479d..19715ba92266a 100644
--- a/pandas/tests/frame/methods/test_value_counts.py
+++ b/pandas/tests/frame/methods/test_value_counts.py
@@ -100,3 +100,41 @@ def test_data_frame_value_counts_empty_normalize():
     expected = pd.Series([], dtype=np.float64)
 
     tm.assert_series_equal(result, expected)
+
+
+def test_data_frame_value_counts_dropna_true():
+    df = pd.DataFrame(
+        {'first_name': ['John', 'Anne', 'John', 'Beth'],
+         'middle_name': ['Smith', pd.NA, pd.NA, 'Louise']},
+    )
+
+    result = df.value_counts()
+    expected = pd.Series(
+        data=[1, 1],
+        index=pd.MultiIndex.from_arrays(
+            [('Beth', 'John'), ('Louise', 'Smith')], names=["first_name", "middle_name"]
+        ),
+    )
+
+    tm.assert_series_equal(result, expected)
+
+
+def test_data_frame_value_counts_dropna_false():
+    df = pd.DataFrame(
+        {'first_name': ['John', 'Anne', 'John', 'Beth'],
+         'middle_name': ['Smith', pd.NA, pd.NA, 'Louise']},
+    )
+
+    result = df.value_counts(dropna=False)
+    expected = pd.Series(
+        data=[1, 1, 1, 1],
+        index=pd.MultiIndex(
+            levels=[pd.Index(['Anne', 'Beth', 'John']),
+                    pd.Index(['Louise', 'Smith', pd.NA])],
+            codes=[[0, 1, 2, 2],
+                   [2, 0, 1, 2]],
+            names=['first_name', 'middle_name'],
+        )
+    )
+
+    tm.assert_series_equal(result, expected)

From ebaad81bc66f3c00f951d38ec69b740b17564945 Mon Sep 17 00:00:00 2001
From: Stefan Mejlgaard <connesy@gmail.com>
Date: Wed, 5 May 2021 16:10:49 +0000
Subject: [PATCH 2/6] ENH: whatsnew

---
 doc/source/whatsnew/v1.3.0.rst | 1 +
 pandas/core/frame.py           | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
index 320912ec38890..cddaaa295af01 100644
--- a/doc/source/whatsnew/v1.3.0.rst
+++ b/doc/source/whatsnew/v1.3.0.rst
@@ -224,6 +224,7 @@ Other enhancements
 - :meth:`.GroupBy.any` and :meth:`.GroupBy.all` return a ``BooleanDtype`` for columns with nullable data types (:issue:`33449`)
 - Constructing a :class:`DataFrame` or :class:`Series` with the ``data`` argument being a Python iterable that is *not* a NumPy ``ndarray`` consisting of NumPy scalars will now result in a dtype with a precision the maximum of the NumPy scalars; this was already the case when ``data`` is a NumPy ``ndarray`` (:issue:`40908`)
 - Add keyword ``sort`` to :func:`pivot_table` to allow non-sorting of the result (:issue:`39143`)
+- Add keyword ``dropna`` to :meth:`DataFrame.value_counts` to allow counting rows that include ``NA`` values (:issue:`41325`)
 -
 
 .. ---------------------------------------------------------------------------
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 126683f569340..6d82d851cd536 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -6400,6 +6400,8 @@ def value_counts(
         dropna : bool, default True
             Don’t include counts of rows that contain NA values.
 
+            .. versionadded:: 1.3.0
+
         Returns
         -------
         Series

From 5216d01563f963f963a469b5c1380f707b8bb287 Mon Sep 17 00:00:00 2001
From: Stefan Mejlgaard <connesy@gmail.com>
Date: Wed, 5 May 2021 21:23:45 +0200
Subject: [PATCH 3/6] ENH: black and sphinx formatting

---
 pandas/core/frame.py                          |  2 ++
 .../tests/frame/methods/test_value_counts.py  | 27 +++++++++++--------
 2 files changed, 18 insertions(+), 11 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 6d82d851cd536..8556dc572a1fe 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -6458,7 +6458,9 @@ def value_counts(
         dtype: float64
 
         **dropna**
+
         With `dropna` set to `False` we can also count rows with NA values.
+
         >>> df = pd.DataFrame({'first_name': ['John', 'Anne', 'John', 'Beth'],
         ...                    'middle_name': ['Smith', pd.NA, pd.NA, 'Louise']})
         >>> df
diff --git a/pandas/tests/frame/methods/test_value_counts.py b/pandas/tests/frame/methods/test_value_counts.py
index 19715ba92266a..f5e388c333886 100644
--- a/pandas/tests/frame/methods/test_value_counts.py
+++ b/pandas/tests/frame/methods/test_value_counts.py
@@ -104,15 +104,17 @@ def test_data_frame_value_counts_empty_normalize():
 
 def test_data_frame_value_counts_dropna_true():
     df = pd.DataFrame(
-        {'first_name': ['John', 'Anne', 'John', 'Beth'],
-         'middle_name': ['Smith', pd.NA, pd.NA, 'Louise']},
+        {
+            "first_name": ["John", "Anne", "John", "Beth"],
+            "middle_name": ["Smith", pd.NA, pd.NA, "Louise"],
+        },
     )
 
     result = df.value_counts()
     expected = pd.Series(
         data=[1, 1],
         index=pd.MultiIndex.from_arrays(
-            [('Beth', 'John'), ('Louise', 'Smith')], names=["first_name", "middle_name"]
+            [("Beth", "John"), ("Louise", "Smith")], names=["first_name", "middle_name"]
         ),
     )
 
@@ -121,20 +123,23 @@ def test_data_frame_value_counts_dropna_true():
 
 def test_data_frame_value_counts_dropna_false():
     df = pd.DataFrame(
-        {'first_name': ['John', 'Anne', 'John', 'Beth'],
-         'middle_name': ['Smith', pd.NA, pd.NA, 'Louise']},
+        {
+            "first_name": ["John", "Anne", "John", "Beth"],
+            "middle_name": ["Smith", pd.NA, pd.NA, "Louise"],
+        },
     )
 
     result = df.value_counts(dropna=False)
     expected = pd.Series(
         data=[1, 1, 1, 1],
         index=pd.MultiIndex(
-            levels=[pd.Index(['Anne', 'Beth', 'John']),
-                    pd.Index(['Louise', 'Smith', pd.NA])],
-            codes=[[0, 1, 2, 2],
-                   [2, 0, 1, 2]],
-            names=['first_name', 'middle_name'],
-        )
+            levels=[
+                pd.Index(["Anne", "Beth", "John"]),
+                pd.Index(["Louise", "Smith", pd.NA]),
+            ],
+            codes=[[0, 1, 2, 2], [2, 0, 1, 2]],
+            names=["first_name", "middle_name"],
+        ),
     )
 
     tm.assert_series_equal(result, expected)

From b16fb3f705cac52dfe32640633a6959d9046ccfd Mon Sep 17 00:00:00 2001
From: Stefan Mejlgaard <connesy@gmail.com>
Date: Thu, 6 May 2021 09:05:01 +0200
Subject: [PATCH 4/6] ENH: Add github reference

---
 pandas/tests/frame/methods/test_value_counts.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pandas/tests/frame/methods/test_value_counts.py b/pandas/tests/frame/methods/test_value_counts.py
index f5e388c333886..0cd269c9b0517 100644
--- a/pandas/tests/frame/methods/test_value_counts.py
+++ b/pandas/tests/frame/methods/test_value_counts.py
@@ -103,6 +103,7 @@ def test_data_frame_value_counts_empty_normalize():
 
 
 def test_data_frame_value_counts_dropna_true():
+    # GH 41334
     df = pd.DataFrame(
         {
             "first_name": ["John", "Anne", "John", "Beth"],
@@ -122,6 +123,7 @@ def test_data_frame_value_counts_dropna_true():
 
 
 def test_data_frame_value_counts_dropna_false():
+    # GH 41334
     df = pd.DataFrame(
         {
             "first_name": ["John", "Anne", "John", "Beth"],

From aa94b3b1ca1a533c015359492720120a35f3fc24 Mon Sep 17 00:00:00 2001
From: Stefan Mejlgaard <connesy@gmail.com>
Date: Thu, 6 May 2021 10:43:14 +0200
Subject: [PATCH 5/6] ENH: Parametrize tests over all NaN types

---
 pandas/tests/frame/methods/test_value_counts.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/pandas/tests/frame/methods/test_value_counts.py b/pandas/tests/frame/methods/test_value_counts.py
index 0cd269c9b0517..6e8528845ea6b 100644
--- a/pandas/tests/frame/methods/test_value_counts.py
+++ b/pandas/tests/frame/methods/test_value_counts.py
@@ -102,15 +102,14 @@ def test_data_frame_value_counts_empty_normalize():
     tm.assert_series_equal(result, expected)
 
 
-def test_data_frame_value_counts_dropna_true():
+def test_data_frame_value_counts_dropna_true(nulls_fixture):
     # GH 41334
     df = pd.DataFrame(
         {
             "first_name": ["John", "Anne", "John", "Beth"],
-            "middle_name": ["Smith", pd.NA, pd.NA, "Louise"],
+            "middle_name": ["Smith", nulls_fixture, nulls_fixture, "Louise"],
         },
     )
-
     result = df.value_counts()
     expected = pd.Series(
         data=[1, 1],
@@ -122,12 +121,12 @@ def test_data_frame_value_counts_dropna_true():
     tm.assert_series_equal(result, expected)
 
 
-def test_data_frame_value_counts_dropna_false():
+def test_data_frame_value_counts_dropna_false(nulls_fixture):
     # GH 41334
     df = pd.DataFrame(
         {
             "first_name": ["John", "Anne", "John", "Beth"],
-            "middle_name": ["Smith", pd.NA, pd.NA, "Louise"],
+            "middle_name": ["Smith", nulls_fixture, nulls_fixture, "Louise"],
         },
     )
 
@@ -137,7 +136,7 @@ def test_data_frame_value_counts_dropna_false():
         index=pd.MultiIndex(
             levels=[
                 pd.Index(["Anne", "Beth", "John"]),
-                pd.Index(["Louise", "Smith", pd.NA]),
+                pd.Index(["Louise", "Smith", nulls_fixture]),
             ],
             codes=[[0, 1, 2, 2], [2, 0, 1, 2]],
             names=["first_name", "middle_name"],

From f798c5b888d1bb28e424d5e39d1510b4e9782391 Mon Sep 17 00:00:00 2001
From: Stefan Mejlgaard <connesy@gmail.com>
Date: Fri, 7 May 2021 09:08:37 +0200
Subject: [PATCH 6/6] ENH: Remove unnecessary comment and label

---
 pandas/core/frame.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 8556dc572a1fe..f3902b0a9d288 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -6457,8 +6457,6 @@ def value_counts(
         6         0            0.25
         dtype: float64
 
-        **dropna**
-
         With `dropna` set to `False` we can also count rows with NA values.
 
         >>> df = pd.DataFrame({'first_name': ['John', 'Anne', 'John', 'Beth'],
@@ -6470,7 +6468,7 @@ def value_counts(
         2       John        <NA>
         3       Beth      Louise
 
-        >>> df.value_counts()  # dropna is True per default
+        >>> df.value_counts()
         first_name  middle_name
         Beth        Louise         1
         John        Smith          1