Skip to content

Commit f7faee0

Browse files
OXPHOSjreback
authored andcommitted
BUG: Crosstab with margins=True ignoring dropna=True
closes #12577 closes #12614
1 parent f71537a commit f7faee0

File tree

3 files changed

+68
-0
lines changed

3 files changed

+68
-0
lines changed

doc/source/whatsnew/v0.18.1.txt

+37
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,43 @@ Bug Fixes
9090

9191
- Bug in ``Period`` and ``PeriodIndex`` creation raises ``KeyError`` if ``freq="Minute"`` is specified. Note that "Minute" freq is deprecated in v0.17.0, and recommended to use ``freq="T"`` instead (:issue:`11854`)
9292

93+
94+
95+
96+
97+
98+
99+
100+
101+
93102
- Bug in ``value_counts`` when ``normalize=True`` and ``dropna=True`` where nulls still contributed to the normalized count (:issue:`12558`)
94103

104+
105+
106+
107+
108+
109+
110+
111+
112+
113+
114+
95115
- Bug in ``CategoricalIndex.get_loc`` returns different result from regular ``Index`` (:issue:`12531`)
116+
117+
118+
119+
120+
121+
122+
123+
124+
125+
126+
127+
128+
129+
130+
131+
132+
- Bug in ``pivot_table`` when ``margins=True`` and ``dropna=True`` where nulls still contributed to margin count (:issue:`12577`)

pandas/tools/pivot.py

+2
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,8 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',
149149
table = table.fillna(value=fill_value, downcast='infer')
150150

151151
if margins:
152+
if dropna:
153+
data = data[data.notnull().all(axis=1)]
152154
table = _add_margins(table, data, values, rows=index,
153155
cols=columns, aggfunc=aggfunc,
154156
margins_name=margins_name)

pandas/tools/tests/test_pivot.py

+29
Original file line numberDiff line numberDiff line change
@@ -936,6 +936,35 @@ def test_crosstab_no_overlap(self):
936936

937937
tm.assert_frame_equal(actual, expected)
938938

939+
def test_margin_ignore_dropna_bug(self):
940+
# GH 12577
941+
# pivot_table counts null into margin ('All')
942+
# when margins=true and dropna=true
943+
944+
df = pd.DataFrame({'a': [1, 2, 2, 2, 2, np.nan],
945+
'b': [3, 3, 4, 4, 4, 4]})
946+
actual = pd.crosstab(df.a, df.b, margins=True, dropna=True)
947+
expected = pd.DataFrame([[1, 0, 1], [1, 3, 4], [2, 3, 5]])
948+
expected.index = Index([1.0, 2.0, 'All'], name='a')
949+
expected.columns = Index([3, 4, 'All'], name='b')
950+
tm.assert_frame_equal(actual, expected)
951+
952+
df = DataFrame({'a': [1, np.nan, np.nan, np.nan, 2, np.nan],
953+
'b': [3, np.nan, 4, 4, 4, 4]})
954+
actual = pd.crosstab(df.a, df.b, margins=True, dropna=True)
955+
expected = pd.DataFrame([[1, 0, 1], [0, 1, 1], [1, 1, 2]])
956+
expected.index = Index([1.0, 2.0, 'All'], name='a')
957+
expected.columns = Index([3.0, 4.0, 'All'], name='b')
958+
tm.assert_frame_equal(actual, expected)
959+
960+
df = DataFrame({'a': [1, np.nan, np.nan, np.nan, np.nan, 2],
961+
'b': [3, 3, 4, 4, 4, 4]})
962+
actual = pd.crosstab(df.a, df.b, margins=True, dropna=True)
963+
expected = pd.DataFrame([[1, 0, 1], [0, 1, 1], [1, 1, 2]])
964+
expected.index = Index([1.0, 2.0, 'All'], name='a')
965+
expected.columns = Index([3, 4, 'All'], name='b')
966+
tm.assert_frame_equal(actual, expected)
967+
939968
if __name__ == '__main__':
940969
import nose
941970
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],

0 commit comments

Comments
 (0)