diff --git a/doc/source/whatsnew/v0.18.1.txt b/doc/source/whatsnew/v0.18.1.txt index b7a0cf888f1a2..db58e562b7f9c 100644 --- a/doc/source/whatsnew/v0.18.1.txt +++ b/doc/source/whatsnew/v0.18.1.txt @@ -102,6 +102,10 @@ Bug Fixes - Bug in ``value_counts`` when ``normalize=True`` and ``dropna=True`` where nulls still contributed to the normalized count (:issue:`12558`) +- Bug in ``pivot_table`` when ``margins=True`` and ``dropna=True`` where nulls still contributed to margin count (:issue:`12577`) +- Bug in ``pivot_table`` when ``margins=True`` and ``dropna=False`` where column names result in KeyError (:issue:`12642`) + + @@ -131,4 +135,3 @@ Bug Fixes -- Bug in ``pivot_table`` when ``margins=True`` and ``dropna=True`` where nulls still contributed to margin count (:issue:`12577`) diff --git a/pandas/tools/pivot.py b/pandas/tools/pivot.py index 06b31b5d5dc30..0d2d3a29092ba 100644 --- a/pandas/tools/pivot.py +++ b/pandas/tools/pivot.py @@ -121,6 +121,7 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean', agged = grouped.agg(aggfunc) table = agged + print data if table.index.nlevels > 1: to_unstack = [agged.index.names[i] or i for i in range(len(index), len(keys))] @@ -175,16 +176,18 @@ def _add_margins(table, data, values, rows, cols, aggfunc, exception_msg = 'Conflicting name "{0}" in margins'.format(margins_name) for level in table.index.names: - if margins_name in table.index.get_level_values(level): - raise ValueError(exception_msg) + if level is not None: + if margins_name in table.index.get_level_values(level): + raise ValueError(exception_msg) grand_margin = _compute_grand_margin(data, values, aggfunc, margins_name) # could be passed a Series object with no 'columns' if hasattr(table, 'columns'): for level in table.columns.names[1:]: - if margins_name in table.columns.get_level_values(level): - raise ValueError(exception_msg) + if level is not None: + if margins_name in table.columns.get_level_values(level): + raise ValueError(exception_msg) if len(rows) > 1: key = (margins_name,) + ('',) * (len(rows) - 1) @@ -465,3 +468,12 @@ def _get_names(arrs, names, prefix='row'): names = list(names) return names + +a = np.array(['foo', 'foo', 'foo', 'bar', + 'bar', 'foo', 'foo'], dtype=object) +b = np.array(['one', 'one', 'two', 'one', + 'two', np.nan, 'two'], dtype=object) +c = np.array(['dull', 'dull', 'dull', 'dull', + 'dull', 'shiny', 'shiny'], dtype=object) + +print crosstab(a, [b, c], margins=True, dropna=False) \ No newline at end of file diff --git a/pandas/tools/tests/test_pivot.py b/pandas/tools/tests/test_pivot.py index ae0cd67ad77e6..62636f91bcbbc 100644 --- a/pandas/tools/tests/test_pivot.py +++ b/pandas/tools/tests/test_pivot.py @@ -936,7 +936,7 @@ def test_crosstab_no_overlap(self): tm.assert_frame_equal(actual, expected) - def test_margin_ignore_dropna_bug(self): + def test_margin_dropna(self): # GH 12577 # pivot_table counts null into margin ('All') # when margins=true and dropna=true @@ -965,6 +965,54 @@ def test_margin_ignore_dropna_bug(self): expected.columns = Index([3, 4, 'All'], name='b') tm.assert_frame_equal(actual, expected) + # GH 12642 + # _add_margins raises KeyError: Level None not found + # when margins=True and dropna=False + + df = pd.DataFrame({'a': [1, 2, 2, 2, 2, np.nan], + 'b': [3, 3, 4, 4, 4, 4]}) + actual = pd.crosstab(df.a, df.b, margins=True, dropna=False) + expected = pd.DataFrame([[1, 0, 1], [1, 3, 4], [2, 4, 6]]) + expected.index = Index([1.0, 2.0, 'All'], name='a') + expected.columns = Index([3, 4, 'All']) + tm.assert_frame_equal(actual, expected) + + df = DataFrame({'a': [1, np.nan, np.nan, np.nan, 2, np.nan], + 'b': [3, np.nan, 4, 4, 4, 4]}) + actual = pd.crosstab(df.a, df.b, margins=True, dropna=False) + expected = pd.DataFrame([[1, 0, 1], [0, 1, 1], [1, 4, 6]]) + expected.index = Index([1.0, 2.0, 'All'], name='a') + expected.columns = Index([3.0, 4.0, 'All']) + tm.assert_frame_equal(actual, expected) + + a = np.array(['foo', 'foo', 'foo', 'bar', + 'bar', 'foo', 'foo'], dtype=object) + b = np.array(['one', 'one', 'two', 'one', + 'two', np.nan, 'two'], dtype=object) + c = np.array(['dull', 'dull', 'dull', 'dull', + 'dull', 'shiny', 'shiny'], dtype=object) + + res = crosstab(a, [b, c], rownames=['a'], + colnames=['b', 'c'], margins=True, dropna=False) + m = MultiIndex.from_tuples([('one', 'dull'), ('one', 'shiny'), + ('two', 'dull'), ('two', 'shiny'), + ('All', '')]) + assert_equal(res.columns.values, m.values) + + res = crosstab([a, b], c, rownames=['a', 'b'], + colnames=['c'], margins=True, dropna=False) + m = MultiIndex.from_tuples([('bar', 'one'), ('bar', 'two'), + ('foo', 'one'), ('foo', 'two'), + ('All', '')]) + assert_equal(res.index.values, m.values) + + res = crosstab([a, b], c, rownames=['a', 'b'], + colnames=['c'], margins=True, dropna=True) + m = MultiIndex.from_tuples([('bar', 'one'), ('bar', 'two'), + ('foo', 'one'), ('foo', 'two'), + ('All', '')]) + assert_equal(res.index.values, m.values) + if __name__ == '__main__': import nose nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],