Skip to content

Commit c8f0d3a

Browse files
committed
fix margin/dropna issue
1 parent 504ad46 commit c8f0d3a

File tree

3 files changed

+65
-5
lines changed

3 files changed

+65
-5
lines changed

doc/source/whatsnew/v0.18.1.txt

+2
Original file line numberDiff line numberDiff line change
@@ -272,6 +272,8 @@ Bug Fixes
272272

273273

274274
- Bug in ``pivot_table`` when ``margins=True`` and ``dropna=True`` where nulls still contributed to margin count (:issue:`12577`)
275+
- Bug in ``pivot_table`` when ``dropna=False`` where table index/column names disappear (:issue:`12133`)
276+
- Bug in ``crosstab`` when ``margins=True`` and ``dropna=False`` raises ``level`` is ``none`` failure (:issue:`12642`)
275277

276278
- Bug in ``Series.name`` when ``name`` attribute can be a hashable type (:issue:`12610`)
277279
- Bug in ``.describe()`` resets categorical columns information (:issue:`11558`)

pandas/tools/pivot.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -128,13 +128,15 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',
128128

129129
if not dropna:
130130
try:
131-
m = MultiIndex.from_arrays(cartesian_product(table.index.levels))
131+
m = MultiIndex.from_arrays(cartesian_product(table.index.levels),
132+
names=table.index.names)
132133
table = table.reindex_axis(m, axis=0)
133134
except AttributeError:
134135
pass # it's a single level
135136

136137
try:
137-
m = MultiIndex.from_arrays(cartesian_product(table.columns.levels))
138+
m = MultiIndex.from_arrays(cartesian_product(table.columns.levels),
139+
names=table.columns.names)
138140
table = table.reindex_axis(m, axis=1)
139141
except AttributeError:
140142
pass # it's a single level or a series

pandas/tools/tests/test_pivot.py

+59-3
Original file line numberDiff line numberDiff line change
@@ -899,8 +899,8 @@ def test_crosstab_dropna(self):
899899
'two', 'two', 'two'], dtype=object)
900900
c = np.array(['dull', 'dull', 'dull', 'dull',
901901
'dull', 'shiny', 'shiny'], dtype=object)
902-
res = crosstab(a, [b, c], rownames=['a'],
903-
colnames=['b', 'c'], dropna=False)
902+
res = pd.crosstab(a, [b, c], rownames=['a'],
903+
colnames=['b', 'c'], dropna=False)
904904
m = MultiIndex.from_tuples([('one', 'dull'), ('one', 'shiny'),
905905
('two', 'dull'), ('two', 'shiny')])
906906
assert_equal(res.columns.values, m.values)
@@ -936,7 +936,7 @@ def test_crosstab_no_overlap(self):
936936

937937
tm.assert_frame_equal(actual, expected)
938938

939-
def test_margin_ignore_dropna_bug(self):
939+
def test_margin_dropna(self):
940940
# GH 12577
941941
# pivot_table counts null into margin ('All')
942942
# when margins=true and dropna=true
@@ -965,6 +965,62 @@ def test_margin_ignore_dropna_bug(self):
965965
expected.columns = Index([3, 4, 'All'], name='b')
966966
tm.assert_frame_equal(actual, expected)
967967

968+
# GH 12642
969+
# _add_margins raises KeyError: Level None not found
970+
# when margins=True and dropna=False
971+
df = pd.DataFrame({'a': [1, 2, 2, 2, 2, np.nan],
972+
'b': [3, 3, 4, 4, 4, 4]})
973+
actual = pd.crosstab(df.a, df.b, margins=True, dropna=False)
974+
expected = pd.DataFrame([[1, 0, 1], [1, 3, 4], [2, 4, 6]])
975+
expected.index = Index([1.0, 2.0, 'All'], name='a')
976+
expected.columns = Index([3, 4, 'All'], name='b')
977+
tm.assert_frame_equal(actual, expected)
978+
979+
df = DataFrame({'a': [1, np.nan, np.nan, np.nan, 2, np.nan],
980+
'b': [3, np.nan, 4, 4, 4, 4]})
981+
actual = pd.crosstab(df.a, df.b, margins=True, dropna=False)
982+
expected = pd.DataFrame([[1, 0, 1], [0, 1, 1], [1, 4, 6]])
983+
expected.index = Index([1.0, 2.0, 'All'], name='a')
984+
expected.columns = Index([3.0, 4.0, 'All'], name='b')
985+
tm.assert_frame_equal(actual, expected)
986+
987+
a = np.array(['foo', 'foo', 'foo', 'bar',
988+
'bar', 'foo', 'foo'], dtype=object)
989+
b = np.array(['one', 'one', 'two', 'one',
990+
'two', np.nan, 'two'], dtype=object)
991+
c = np.array(['dull', 'dull', 'dull', 'dull',
992+
'dull', 'shiny', 'shiny'], dtype=object)
993+
994+
actual = pd.crosstab(a, [b, c], rownames=['a'],
995+
colnames=['b', 'c'], margins=True, dropna=False)
996+
m = MultiIndex.from_arrays([['one', 'one', 'two', 'two', 'All'],
997+
['dull', 'shiny', 'dull', 'shiny', '']],
998+
names=['b', 'c'])
999+
expected = DataFrame([[1, 0, 1, 0, 2], [2, 0, 1, 1, 5],
1000+
[3, 0, 2, 1, 7]], columns=m)
1001+
expected.index = Index(['bar', 'foo', 'All'], name='a')
1002+
tm.assert_frame_equal(actual, expected)
1003+
1004+
actual = pd.crosstab([a, b], c, rownames=['a', 'b'],
1005+
colnames=['c'], margins=True, dropna=False)
1006+
m = MultiIndex.from_arrays([['bar', 'bar', 'foo', 'foo', 'All'],
1007+
['one', 'two', 'one', 'two', '']],
1008+
names=['a', 'b'])
1009+
expected = DataFrame([[1, 0, 1], [1, 0, 1], [2, 0, 2], [1, 1, 2],
1010+
[5, 2, 7]], index=m)
1011+
expected.columns = Index(['dull', 'shiny', 'All'], name='c')
1012+
tm.assert_frame_equal(actual, expected)
1013+
1014+
actual = pd.crosstab([a, b], c, rownames=['a', 'b'],
1015+
colnames=['c'], margins=True, dropna=True)
1016+
m = MultiIndex.from_arrays([['bar', 'bar', 'foo', 'foo', 'All'],
1017+
['one', 'two', 'one', 'two', '']],
1018+
names=['a', 'b'])
1019+
expected = DataFrame([[1, 0, 1], [1, 0, 1], [2, 0, 2], [1, 1, 2],
1020+
[5, 1, 6]], index=m)
1021+
expected.columns = Index(['dull', 'shiny', 'All'], name='c')
1022+
tm.assert_frame_equal(actual, expected)
1023+
9681024
if __name__ == '__main__':
9691025
import nose
9701026
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],

0 commit comments

Comments
 (0)