pandas-dev · iabhi4 · May 31, 2025 · May 31, 2025
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -844,6 +844,7 @@ Reshaping
 - Bug in :meth:`DataFrame.merge` when merging two :class:`DataFrame` on ``intc`` or ``uintc`` types on Windows (:issue:`60091`, :issue:`58713`)
 - Bug in :meth:`DataFrame.pivot_table` incorrectly subaggregating results when called without an ``index`` argument (:issue:`58722`)
 - Bug in :meth:`DataFrame.pivot_table` incorrectly ignoring the ``values`` argument when also supplied to the ``index`` or ``columns`` parameters (:issue:`57876`, :issue:`61292`)
+- Bug in :meth:`DataFrame.pivot_table` where ``margins=True`` did not correctly include groups with ``NaN`` values in the index or columns when ``dropna=False`` was explicitly passed. (:issue:`61509`)
 - Bug in :meth:`DataFrame.stack` with the new implementation where ``ValueError`` is raised when ``level=[]`` (:issue:`60740`)
 - Bug in :meth:`DataFrame.unstack` producing incorrect results when manipulating empty :class:`DataFrame` with an :class:`ExtentionDtype` (:issue:`59123`)
 - Bug in :meth:`concat` where concatenating DataFrame and Series with ``ignore_index = True`` drops the series name (:issue:`60723`, :issue:`56257`)

diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py
@@ -396,6 +396,7 @@ def __internal_pivot_table(
             observed=dropna,
             margins_name=margins_name,
             fill_value=fill_value,
+            dropna=dropna,
         )
 
     # discard the top level
@@ -422,6 +423,7 @@ def _add_margins(
     observed: bool,
     margins_name: Hashable = "All",
     fill_value=None,
+    dropna: bool = True,
 ):
     if not isinstance(margins_name, str):
         raise ValueError("margins_name argument must be a string")
@@ -461,6 +463,7 @@ def _add_margins(
             kwargs,
             observed,
             margins_name,
+            dropna,
         )
         if not isinstance(marginal_result_set, tuple):
             return marginal_result_set
@@ -469,7 +472,7 @@ def _add_margins(
         # no values, and table is a DataFrame
         assert isinstance(table, ABCDataFrame)
         marginal_result_set = _generate_marginal_results_without_values(
-            table, data, rows, cols, aggfunc, kwargs, observed, margins_name
+            table, data, rows, cols, aggfunc, kwargs, observed, margins_name, dropna
         )
         if not isinstance(marginal_result_set, tuple):
             return marginal_result_set
@@ -538,6 +541,7 @@ def _generate_marginal_results(
     kwargs,
     observed: bool,
     margins_name: Hashable = "All",
+    dropna: bool = True,
 ):
     margin_keys: list | Index
     if len(cols) > 0:
@@ -551,7 +555,7 @@ def _all_key(key):
         if len(rows) > 0:
             margin = (
                 data[rows + values]
-                .groupby(rows, observed=observed)
+                .groupby(rows, observed=observed, dropna=dropna)
                 .agg(aggfunc, **kwargs)
             )
             cat_axis = 1
@@ -567,7 +571,7 @@ def _all_key(key):
         else:
             margin = (
                 data[cols[:1] + values]
-                .groupby(cols[:1], observed=observed)
+                .groupby(cols[:1], observed=observed, dropna=dropna)
                 .agg(aggfunc, **kwargs)
                 .T
             )
@@ -610,7 +614,9 @@ def _all_key(key):
 
     if len(cols) > 0:
         row_margin = (
-            data[cols + values].groupby(cols, observed=observed).agg(aggfunc, **kwargs)
+            data[cols + values]
+            .groupby(cols, observed=observed, dropna=dropna)
+            .agg(aggfunc, **kwargs)
         )
         row_margin = row_margin.stack()
 
@@ -633,6 +639,7 @@ def _generate_marginal_results_without_values(
     kwargs,
     observed: bool,
     margins_name: Hashable = "All",
+    dropna: bool = True,
 ):
     margin_keys: list | Index
     if len(cols) > 0:
@@ -645,7 +652,7 @@ def _all_key():
             return (margins_name,) + ("",) * (len(cols) - 1)
 
         if len(rows) > 0:
-            margin = data.groupby(rows, observed=observed)[rows].apply(
+            margin = data.groupby(rows, observed=observed, dropna=dropna)[rows].apply(
                 aggfunc, **kwargs
             )
             all_key = _all_key()
@@ -654,7 +661,9 @@ def _all_key():
             margin_keys.append(all_key)
 
         else:
-            margin = data.groupby(level=0, observed=observed).apply(aggfunc, **kwargs)
+            margin = data.groupby(level=0, observed=observed, dropna=dropna).apply(
+                aggfunc, **kwargs
+            )
             all_key = _all_key()
             table[all_key] = margin
             result = table
@@ -665,7 +674,7 @@ def _all_key():
         margin_keys = table.columns
 
     if len(cols):
-        row_margin = data.groupby(cols, observed=observed)[cols].apply(
+        row_margin = data.groupby(cols, observed=observed, dropna=dropna)[cols].apply(
             aggfunc, **kwargs
         )
     else:

diff --git a/pandas/tests/reshape/test_crosstab.py b/pandas/tests/reshape/test_crosstab.py
@@ -289,7 +289,7 @@ def test_margin_dropna4(self):
         # GH: 10772: Keep np.nan in result with dropna=False
         df = DataFrame({"a": [1, 2, 2, 2, 2, np.nan], "b": [3, 3, 4, 4, 4, 4]})
         actual = crosstab(df.a, df.b, margins=True, dropna=False)
-        expected = DataFrame([[1, 0, 1.0], [1, 3, 4.0], [0, 1, np.nan], [2, 4, 6.0]])
+        expected = DataFrame([[1, 0, 1], [1, 3, 4], [0, 1, 1], [2, 4, 6]])
         expected.index = Index([1.0, 2.0, np.nan, "All"], name="a")
         expected.columns = Index([3, 4, "All"], name="b")
         tm.assert_frame_equal(actual, expected)
@@ -301,11 +301,11 @@ def test_margin_dropna5(self):
         )
         actual = crosstab(df.a, df.b, margins=True, dropna=False)
         expected = DataFrame(
-            [[1, 0, 0, 1.0], [0, 1, 0, 1.0], [0, 3, 1, np.nan], [1, 4, 0, 6.0]]
+            [[1, 0, 0, 1.0], [0, 1, 0, 1.0], [0, 3, 1, 4.0], [1, 4, 1, 6.0]]
         )
         expected.index = Index([1.0, 2.0, np.nan, "All"], name="a")
         expected.columns = Index([3.0, 4.0, np.nan, "All"], name="b")
-        tm.assert_frame_equal(actual, expected)
+        tm.assert_frame_equal(actual, expected, check_dtype=False)
 
     def test_margin_dropna6(self):
         # GH: 10772: Keep np.nan in result with dropna=False
@@ -326,7 +326,7 @@ def test_margin_dropna6(self):
             names=["b", "c"],
         )
         expected = DataFrame(
-            [[1, 0, 1, 0, 0, 0, 2], [2, 0, 1, 1, 0, 1, 5], [3, 0, 2, 1, 0, 0, 7]],
+            [[1, 0, 1, 0, 0, 0, 2], [2, 0, 1, 1, 0, 1, 5], [3, 0, 2, 1, 0, 1, 7]],
             columns=m,
         )
         expected.index = Index(["bar", "foo", "All"], name="a")
@@ -344,13 +344,13 @@ def test_margin_dropna6(self):
         )
         expected = DataFrame(
             [
-                [1, 0, 1.0],
-                [1, 0, 1.0],
+                [1, 0, 1],
+                [1, 0, 1],
                 [0, 0, np.nan],
-                [2, 0, 2.0],
-                [1, 1, 2.0],
-                [0, 1, np.nan],
-                [5, 2, 7.0],
+                [2, 0, 2],
+                [1, 1, 2],
+                [0, 1, 1],
+                [5, 2, 7],
             ],
             index=m,
         )

diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py
@@ -2594,6 +2594,36 @@ def test_pivot_table_values_as_two_params(
         expected = DataFrame(data=e_data, index=e_index, columns=e_cols)
         tm.assert_frame_equal(result, expected)
 
+    def test_pivot_table_margins_include_nan_groups(self):
+        # GH#61509
+        df = DataFrame(
+            {
+                "i": [1, 2, 3],
+                "g1": ["a", "b", "b"],
+                "g2": ["x", None, None],
+            }
+        )
+
+        result = df.pivot_table(
+            index="g1",
+            columns="g2",
+            values="i",
+            aggfunc="count",
+            dropna=False,
+            margins=True,
+        )
+
+        expected = DataFrame(
+            {
+                "x": {"a": 1.0, "b": np.nan, "All": 1.0},
+                np.nan: {"a": np.nan, "b": 2.0, "All": 2.0},
+                "All": {"a": 1.0, "b": 2.0, "All": 3.0},
+            }
+        )
+        expected.index.name = "g1"
+        expected.columns.name = "g2"
+        tm.assert_frame_equal(result, expected, check_dtype=False)
+
 
 class TestPivot:
     def test_pivot(self):