pandas-dev · OXPHOS · Mar 14, 2016 · Mar 14, 2016 · Mar 14, 2016 · Mar 15, 2016
diff --git a/doc/source/whatsnew/v0.18.1.txt b/doc/source/whatsnew/v0.18.1.txt
@@ -89,3 +89,5 @@ Bug Fixes
 ~~~~~~~~~
 
 - Bug in ``value_counts`` when ``normalize=True`` and ``dropna=True`` where nulls still contributed to the normalized count (:issue:`12558`)
+- Bug in ``pivot_table`` when ``margins=True`` and ``dropna=True`` where nulls still contributed to margin count (:issue:`12577`)
+- Bug in ``pivot_table`` when ``margins=True`` and ``dropna=False`` where column names result in KeyError (:issue:`12642`)
diff --git a/pandas/tools/pivot.py b/pandas/tools/pivot.py
@@ -149,6 +149,8 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',
         table = table.fillna(value=fill_value, downcast='infer')
 
     if margins:
+        if dropna:
+            data = data[data.notnull().all(axis=1)]
         table = _add_margins(table, data, values, rows=index,
                              cols=columns, aggfunc=aggfunc,
                              margins_name=margins_name)
@@ -173,16 +175,18 @@ def _add_margins(table, data, values, rows, cols, aggfunc,
 
     exception_msg = 'Conflicting name "{0}" in margins'.format(margins_name)
     for level in table.index.names:
-        if margins_name in table.index.get_level_values(level):
-            raise ValueError(exception_msg)
+        if level is not None:
+            if margins_name in table.index.get_level_values(level):
+                raise ValueError(exception_msg)
 
     grand_margin = _compute_grand_margin(data, values, aggfunc, margins_name)
 
     # could be passed a Series object with no 'columns'
     if hasattr(table, 'columns'):
         for level in table.columns.names[1:]:
-            if margins_name in table.columns.get_level_values(level):
-                raise ValueError(exception_msg)
+            if level is not None:
+                if margins_name in table.columns.get_level_values(level):
+                    raise ValueError(exception_msg)
 
     if len(rows) > 1:
         key = (margins_name,) + ('',) * (len(rows) - 1)

diff --git a/pandas/tools/tests/test_pivot.py b/pandas/tools/tests/test_pivot.py
@@ -936,6 +936,84 @@ def test_crosstab_no_overlap(self):
 
         tm.assert_frame_equal(actual, expected)
 
+    def test_margin_ignore_dropna_bug(self):
+        # GH 12577
+        # pivot_table counts null into margin ('All')
+        # when margins=true and dropna=true
+
+        df = pd.DataFrame({'a': [1, 2, 2, 2, 2, np.nan],
+                           'b': [3, 3, 4, 4, 4, 4]})
+        actual = pd.crosstab(df.a, df.b, margins=True, dropna=True)
+        expected = pd.DataFrame([[1, 0, 1], [1, 3, 4], [2, 3, 5]])
+        expected.index = Index([1.0, 2.0, 'All'], name='a')
+        expected.columns = Index([3, 4, 'All'], name='b')
+        tm.assert_frame_equal(actual, expected)
+
+        df = DataFrame({'a': [1, np.nan, np.nan, np.nan, 2, np.nan],
+                        'b': [3, np.nan, 4, 4, 4, 4]})
+        actual = pd.crosstab(df.a, df.b, margins=True, dropna=True)
+        expected = pd.DataFrame([[1, 0, 1], [0, 1, 1], [1, 1, 2]])
+        expected.index = Index([1.0, 2.0, 'All'], name='a')
+        expected.columns = Index([3.0, 4.0, 'All'], name='b')
+        tm.assert_frame_equal(actual, expected)
+
+        df = DataFrame({'a': [1, np.nan, np.nan, np.nan, np.nan, 2],
+                        'b': [3, 3, 4, 4, 4, 4]})
+        actual = pd.crosstab(df.a, df.b, margins=True, dropna=True)
+        expected = pd.DataFrame([[1, 0, 1], [0, 1, 1], [1, 1, 2]])
+        expected.index = Index([1.0, 2.0, 'All'], name='a')
+        expected.columns = Index([3, 4, 'All'], name='b')
+        tm.assert_frame_equal(actual, expected)
+
+    def test_marginsTrue_dropnaFalse_KeyError_bug(self):
+        # GH 12642
+        # _add_margins raises KeyError: Level None not found
+        # when margins=True and dropna=False
+
+        df = pd.DataFrame({'a': [1, 2, 2, 2, 2, np.nan],
+                           'b': [3, 3, 4, 4, 4, 4]})
+        actual = pd.crosstab(df.a, df.b, margins=True, dropna=False)
+        expected = pd.DataFrame([[1, 0, 1], [1, 3, 4], [2, 4, 6]])
+        expected.index = Index([1.0, 2.0, 'All'], name='a')
+        expected.columns = Index([3, 4, 'All'])
+        tm.assert_frame_equal(actual, expected)
+
+        df = DataFrame({'a': [1, np.nan, np.nan, np.nan, 2, np.nan],
+                        'b': [3, np.nan, 4, 4, 4, 4]})
+        actual = pd.crosstab(df.a, df.b, margins=True, dropna=False)
+        expected = pd.DataFrame([[1, 0, 1], [0, 1, 1], [1, 4, 6]])
+        expected.index = Index([1.0, 2.0, 'All'], name='a')
+        expected.columns = Index([3.0, 4.0, 'All'])
+        tm.assert_frame_equal(actual, expected)
+
+        a = np.array(['foo', 'foo', 'foo', 'bar',
+                      'bar', 'foo', 'foo'], dtype=object)
+        b = np.array(['one', 'one', 'two', 'one',
+                      'two', np.nan, 'two'], dtype=object)
+        c = np.array(['dull', 'dull', 'dull', 'dull',
+                      'dull', 'shiny', 'shiny'], dtype=object)
+
+        res = crosstab(a, [b, c], rownames=['a'],
+                       colnames=['b', 'c'], margins=True, dropna=False)
+        m = MultiIndex.from_tuples([('one', 'dull'), ('one', 'shiny'),
+                                    ('two', 'dull'), ('two', 'shiny'),
+                                    ('All', '')])
+        assert_equal(res.columns.values, m.values)
+
+        res = crosstab([a, b], c, rownames=['a', 'b'],
+                       colnames=['c'], margins=True, dropna=False)
+        m = MultiIndex.from_tuples([('bar', 'one'), ('bar', 'two'),
+                                    ('foo', 'one'), ('foo', 'two'),
+                                    ('All', '')])
+        assert_equal(res.index.values, m.values)
+
+        res = crosstab([a, b], c, rownames=['a', 'b'],
+                       colnames=['c'], margins=True, dropna=True)
+        m = MultiIndex.from_tuples([('bar', 'one'), ('bar', 'two'),
+                                    ('foo', 'one'), ('foo', 'two'),
+                                    ('All', '')])
+        assert_equal(res.index.values, m.values)
+
 if __name__ == '__main__':
     import nose
     nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],