pandas-dev · phofl · May 14, 2023 · May 12, 2023
diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst
@@ -420,6 +420,7 @@ Groupby/resample/rolling
 
 Reshaping
 ^^^^^^^^^
+- Bug in :func:`crosstab` when ``dropna=False`` would not keep ``np.nan`` in the result (:issue:`10772`)
 - Bug in :meth:`DataFrame.agg` and :meth:`Series.agg` on non-unique columns would return incorrect type when dist-like argument passed in (:issue:`51099`)
 - Bug in :meth:`DataFrame.stack` losing extension dtypes when columns is a :class:`MultiIndex` and frame contains mixed dtypes (:issue:`45740`)
 - Bug in :meth:`DataFrame.transpose` inferring dtype for object column (:issue:`51546`)

diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py
@@ -164,7 +164,7 @@ def __internal_pivot_table(
                 pass
         values = list(values)
 
-    grouped = data.groupby(keys, observed=observed, sort=sort)
+    grouped = data.groupby(keys, observed=observed, sort=sort, dropna=dropna)
     agged = grouped.agg(aggfunc)
 
     if dropna and isinstance(agged, ABCDataFrame) and len(agged.columns):

diff --git a/pandas/tests/reshape/test_crosstab.py b/pandas/tests/reshape/test_crosstab.py
@@ -286,24 +286,29 @@ def test_margin_dropna4(self):
         # GH 12642
         # _add_margins raises KeyError: Level None not found
         # when margins=True and dropna=False
+        # GH: 10772: Keep np.nan in result with dropna=False
         df = DataFrame({"a": [1, 2, 2, 2, 2, np.nan], "b": [3, 3, 4, 4, 4, 4]})
         actual = crosstab(df.a, df.b, margins=True, dropna=False)
-        expected = DataFrame([[1, 0, 1], [1, 3, 4], [2, 4, 6]])
-        expected.index = Index([1.0, 2.0, "All"], name="a")
+        expected = DataFrame([[1, 0, 1.0], [1, 3, 4.0], [0, 1, np.nan], [2, 4, 6.0]])
+        expected.index = Index([1.0, 2.0, np.nan, "All"], name="a")
         expected.columns = Index([3, 4, "All"], name="b")
         tm.assert_frame_equal(actual, expected)
 
     def test_margin_dropna5(self):
+        # GH: 10772: Keep np.nan in result with dropna=False
         df = DataFrame(
             {"a": [1, np.nan, np.nan, np.nan, 2, np.nan], "b": [3, np.nan, 4, 4, 4, 4]}
         )
         actual = crosstab(df.a, df.b, margins=True, dropna=False)
-        expected = DataFrame([[1, 0, 1], [0, 1, 1], [1, 4, 6]])
-        expected.index = Index([1.0, 2.0, "All"], name="a")
-        expected.columns = Index([3.0, 4.0, "All"], name="b")
+        expected = DataFrame(
+            [[1, 0, 0, 1.0], [0, 1, 0, 1.0], [0, 3, 1, np.nan], [1, 4, 0, 6.0]]
+        )
+        expected.index = Index([1.0, 2.0, np.nan, "All"], name="a")
+        expected.columns = Index([3.0, 4.0, np.nan, "All"], name="b")
         tm.assert_frame_equal(actual, expected)
 
     def test_margin_dropna6(self):
+        # GH: 10772: Keep np.nan in result with dropna=False
         a = np.array(["foo", "foo", "foo", "bar", "bar", "foo", "foo"], dtype=object)
         b = np.array(["one", "one", "two", "one", "two", np.nan, "two"], dtype=object)
         c = np.array(
@@ -315,13 +320,14 @@ def test_margin_dropna6(self):
         )
         m = MultiIndex.from_arrays(
             [
-                ["one", "one", "two", "two", "All"],
-                ["dull", "shiny", "dull", "shiny", ""],
+                ["one", "one", "two", "two", np.nan, np.nan, "All"],
+                ["dull", "shiny", "dull", "shiny", "dull", "shiny", ""],
             ],
             names=["b", "c"],
         )
         expected = DataFrame(
-            [[1, 0, 1, 0, 2], [2, 0, 1, 1, 5], [3, 0, 2, 1, 7]], columns=m
+            [[1, 0, 1, 0, 0, 0, 2], [2, 0, 1, 1, 0, 1, 5], [3, 0, 2, 1, 0, 0, 7]],
+            columns=m,
         )
         expected.index = Index(["bar", "foo", "All"], name="a")
         tm.assert_frame_equal(actual, expected)
@@ -330,11 +336,23 @@ def test_margin_dropna6(self):
             [a, b], c, rownames=["a", "b"], colnames=["c"], margins=True, dropna=False
         )
         m = MultiIndex.from_arrays(
-            [["bar", "bar", "foo", "foo", "All"], ["one", "two", "one", "two", ""]],
+            [
+                ["bar", "bar", "bar", "foo", "foo", "foo", "All"],
+                ["one", "two", np.nan, "one", "two", np.nan, ""],
+            ],
             names=["a", "b"],
         )
         expected = DataFrame(
-            [[1, 0, 1], [1, 0, 1], [2, 0, 2], [1, 1, 2], [5, 2, 7]], index=m
+            [
+                [1, 0, 1.0],
+                [1, 0, 1.0],
+                [0, 0, np.nan],
+                [2, 0, 2.0],
+                [1, 1, 2.0],
+                [0, 1, np.nan],
+                [5, 2, 7.0],
+            ],
+            index=m,
         )
         expected.columns = Index(["dull", "shiny", "All"], name="c")
         tm.assert_frame_equal(actual, expected)

diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py
@@ -248,11 +248,18 @@ def test_pivot_with_non_observable_dropna(self, dropna):
         )
 
         result = df.pivot_table(index="A", values="B", dropna=dropna)
+        if dropna:
+            values = [2.0, 3.0]
+            codes = [0, 1]
+        else:
+            # GH: 10772
+            values = [2.0, 3.0, 0.0]
+            codes = [0, 1, -1]
         expected = DataFrame(
-            {"B": [2.0, 3.0]},
+            {"B": values},
             index=Index(
                 Categorical.from_codes(
-                    [0, 1], categories=["low", "high"], ordered=True
+                    codes, categories=["low", "high"], ordered=dropna
                 ),
                 name="A",
             ),