Skip to content

Commit a733583

Browse files
committed
BUG: preserve index names in GroupBy.apply concat step, GH #481
1 parent 21bad0f commit a733583

File tree

3 files changed

+66
-17
lines changed

3 files changed

+66
-17
lines changed

RELEASE.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,10 @@ pandas 0.6.1
112112
- Handle differently-indexed Series input to DataFrame constructor (GH #475)
113113
- Omit nuisance columns in multi-groupby with Python function
114114
- Buglet in handling of single grouping in general apply
115+
- Handle type inference properly when passing list of lists or tuples to
116+
DataFrame constructor (GH #484)
117+
- Preserve Index / MultiIndex names in GroupBy.apply concatenation step (GH
118+
#481)
115119

116120
Thanks
117121
------

pandas/core/groupby.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1188,6 +1188,8 @@ def _concat_frames_hierarchical(frames, keys, groupings, axis=0):
11881188
return DataFrame(new_values, index=new_index, columns=new_columns)
11891189

11901190
def _make_concat_multiindex(indexes, keys, groupings):
1191+
names = [ping.name for ping in groupings]
1192+
11911193
if not _all_indexes_same(indexes):
11921194
label_list = []
11931195

@@ -1213,13 +1215,22 @@ def _make_concat_multiindex(indexes, keys, groupings):
12131215
else:
12141216
label_list.append(concat_index.values)
12151217

1216-
return MultiIndex.from_arrays(label_list)
1218+
consensus_name = indexes[0].names
1219+
for index in indexes[1:]:
1220+
if index.names != consensus_name:
1221+
consensus_name = [None] * index.nlevels
1222+
break
1223+
names.extend(consensus_name)
1224+
1225+
return MultiIndex.from_arrays(label_list, names=names)
12171226

12181227
new_index = indexes[0]
12191228
n = len(new_index)
12201229

1230+
names.append(indexes[0].name)
1231+
12211232
# do something a bit more speedy
1222-
levels = [ping.group_index for ping in groupings]
1233+
levels = [ping.group_index for ping in groupings]
12231234
levels.append(new_index)
12241235

12251236
# construct labels
@@ -1237,7 +1248,7 @@ def _make_concat_multiindex(indexes, keys, groupings):
12371248

12381249
# last labels for the new level
12391250
labels.append(np.tile(np.arange(n), len(indexes)))
1240-
return MultiIndex(levels=levels, labels=labels)
1251+
return MultiIndex(levels=levels, labels=labels, names=names)
12411252

12421253
def _all_indexes_same(indexes):
12431254
first = indexes[0]

pandas/tests/test_groupby.py

Lines changed: 48 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,19 @@ def setUp(self):
6262
self.mframe = DataFrame(np.random.randn(10, 3), index=index,
6363
columns=['A', 'B', 'C'])
6464

65+
self.three_group = DataFrame({'A' : ['foo', 'foo', 'foo', 'foo',
66+
'bar', 'bar', 'bar', 'bar',
67+
'foo', 'foo', 'foo'],
68+
'B' : ['one', 'one', 'one', 'two',
69+
'one', 'one', 'one', 'two',
70+
'two', 'two', 'one'],
71+
'C' : ['dull', 'dull', 'shiny', 'dull',
72+
'dull', 'shiny', 'shiny', 'dull',
73+
'shiny', 'shiny', 'shiny'],
74+
'D' : np.random.randn(11),
75+
'E' : np.random.randn(11),
76+
'F' : np.random.randn(11)})
77+
6578
def test_basic(self):
6679
data = Series(np.arange(9) // 3, index=np.arange(9))
6780

@@ -686,25 +699,46 @@ def test_omit_nuisance(self):
686699
result = self.assertRaises(TypeError, grouped.agg, np.sum)
687700

688701
def test_omit_nuisance_python_multiple(self):
689-
data = DataFrame({'A' : ['foo', 'foo', 'foo', 'foo',
690-
'bar', 'bar', 'bar', 'bar',
691-
'foo', 'foo', 'foo'],
692-
'B' : ['one', 'one', 'one', 'two',
693-
'one', 'one', 'one', 'two',
694-
'two', 'two', 'one'],
695-
'C' : ['dull', 'dull', 'shiny', 'dull',
696-
'dull', 'shiny', 'shiny', 'dull',
697-
'shiny', 'shiny', 'shiny'],
698-
'D' : np.random.randn(11),
699-
'E' : np.random.randn(11),
700-
'F' : np.random.randn(11)})
701-
702-
grouped = data.groupby(['A', 'B'])
702+
grouped = self.three_group.groupby(['A', 'B'])
703703

704704
agged = grouped.agg(np.mean)
705705
exp = grouped.mean()
706706
assert_frame_equal(agged, exp)
707707

708+
def test_apply_concat_preserve_names(self):
709+
grouped = self.three_group.groupby(['A', 'B'])
710+
711+
def desc(group):
712+
result = group.describe()
713+
result.index.name = 'stat'
714+
return result
715+
716+
def desc2(group):
717+
result = group.describe()
718+
result.index.name = 'stat'
719+
result = result[:len(group)]
720+
# weirdo
721+
return result
722+
723+
def desc3(group):
724+
result = group.describe()
725+
726+
# names are different
727+
result.index.name = 'stat_%d' % len(group)
728+
729+
result = result[:len(group)]
730+
# weirdo
731+
return result
732+
733+
result = grouped.apply(desc)
734+
self.assertEquals(result.index.names, ['A', 'B', 'stat'])
735+
736+
result2 = grouped.apply(desc2)
737+
self.assertEquals(result2.index.names, ['A', 'B', 'stat'])
738+
739+
result3 = grouped.apply(desc3)
740+
self.assertEquals(result3.index.names, ['A', 'B', None])
741+
708742
def test_nonsense_func(self):
709743
df = DataFrame([0])
710744
self.assertRaises(Exception, df.groupby, lambda x: x + 'foo')

0 commit comments

Comments
 (0)