Skip to content

Commit 4f3472d

Browse files
committed
BUG: fix groupby bug/segfault with NAs in hierarchical level. fix MultiIndex tuple with NA issue. close #2616
1 parent dcd9df7 commit 4f3472d

File tree

5 files changed

+43
-1
lines changed

5 files changed

+43
-1
lines changed

RELEASE.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,9 @@ pandas 0.10.1
6565
- More robust detection of being in IPython session for wide DataFrame
6666
console formatting (GH2585_)
6767
- Fix platform issues with ``file:///`` in unit test (#2564)
68+
- Fix bug and possible segfault when grouping by hierarchical level that
69+
contains NA values (GH2616_)
70+
- Ensure that MultiIndex tuples can be constructed with NAs (seen in #2616)
6871

6972
**API Changes**
7073

@@ -78,6 +81,7 @@ pandas 0.10.1
7881
.. _GH2576: https://github.com/pydata/pandas/issues/2576
7982
.. _GH2585: https://github.com/pydata/pandas/issues/2585
8083
.. _GH2604: https://github.com/pydata/pandas/issues/2604
84+
.. _GH2616: https://github.com/pydata/pandas/issues/2616
8185

8286
pandas 0.10.0
8387
=============

pandas/core/groupby.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1109,6 +1109,15 @@ def __init__(self, index, grouper=None, name=None, level=None,
11091109
# all levels may not be observed
11101110
labels, uniques = algos.factorize(inds, sort=True)
11111111

1112+
if len(uniques) > 0 and uniques[0] == -1:
1113+
# handle NAs
1114+
mask = inds != -1
1115+
ok_labels, uniques = algos.factorize(inds[mask], sort=True)
1116+
1117+
labels = np.empty(len(inds), dtype=inds.dtype)
1118+
labels[mask] = ok_labels
1119+
labels[-mask] = -1
1120+
11121121
if len(uniques) < len(level_index):
11131122
level_index = level_index.take(uniques)
11141123

pandas/core/index.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1467,7 +1467,7 @@ def values(self):
14671467

14681468
values = []
14691469
for lev, lab in zip(self.levels, self.labels):
1470-
taken = ndtake(lev.values, lab)
1470+
taken = com.take_1d(lev.values, lab)
14711471
# Need to box timestamps, etc.
14721472
if hasattr(lev, '_box_values'):
14731473
taken = lev._box_values(taken)

pandas/tests/test_groupby.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1157,6 +1157,27 @@ def test_groupby_level(self):
11571157
# raise exception for non-MultiIndex
11581158
self.assertRaises(ValueError, self.df.groupby, level=1)
11591159

1160+
def test_groupby_level_with_nas(self):
1161+
index = MultiIndex(levels=[[1, 0], [0, 1, 2, 3]],
1162+
labels=[[1, 1, 1, 1, 0, 0, 0, 0],
1163+
[0, 1, 2, 3, 0, 1, 2, 3]])
1164+
1165+
# factorizing doesn't confuse things
1166+
s = Series(np.arange(8.), index=index)
1167+
result = s.groupby(level=0).sum()
1168+
expected = Series([22., 6.], index=[1, 0])
1169+
assert_series_equal(result, expected)
1170+
1171+
index = MultiIndex(levels=[[1, 0], [0, 1, 2, 3]],
1172+
labels=[[1, 1, 1, 1, -1, 0, 0, 0],
1173+
[0, 1, 2, 3, 0, 1, 2, 3]])
1174+
1175+
# factorizing doesn't confuse things
1176+
s = Series(np.arange(8.), index=index)
1177+
result = s.groupby(level=0).sum()
1178+
expected = Series([18., 6.], index=[1, 0])
1179+
assert_series_equal(result, expected)
1180+
11601181
def test_groupby_level_apply(self):
11611182
frame = self.mframe
11621183

pandas/tests/test_multilevel.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1684,6 +1684,14 @@ def test_assign_index_sequences(self):
16841684
df.index = l
16851685
repr(df)
16861686

1687+
def test_tuples_have_na(self):
1688+
index = MultiIndex(levels=[[1, 0], [0, 1, 2, 3]],
1689+
labels=[[1, 1, 1, 1, -1, 0, 0, 0],
1690+
[0, 1, 2, 3, 0, 1, 2, 3]])
1691+
1692+
self.assertTrue(isnull(index[4][0]))
1693+
self.assertTrue(isnull(index.values[4][0]))
1694+
16871695

16881696
if __name__ == '__main__':
16891697

0 commit comments

Comments
 (0)