Skip to content

Commit 82d73a9

Browse files
committed
BUG: fix GroupBy.apply bug, GH #237
1 parent 0113ed4 commit 82d73a9

File tree

2 files changed

+48
-11
lines changed

2 files changed

+48
-11
lines changed

pandas/core/groupby.py

+29-10
Original file line numberDiff line numberDiff line change
@@ -1069,22 +1069,35 @@ def transform(self, func, *args, **kwargs):
10691069
axis=self.axis)
10701070

10711071
def _concat_frames(frames, index, columns=None, axis=0):
1072-
if axis == 0:
1073-
all_index = [np.asarray(x.index) for x in frames]
1074-
new_index = Index(np.concatenate(all_index))
1072+
if len(frames) == 1:
1073+
return frames[0]
10751074

1075+
if axis == 0:
1076+
new_index = _concat_indexes([x.index for x in frames])
10761077
if columns is None:
10771078
new_columns = frames[0].columns
10781079
else:
10791080
new_columns = columns
10801081
else:
1081-
all_columns = [np.asarray(x.columns) for x in frames]
1082-
new_columns = Index(np.concatenate(all_columns))
1082+
new_columns = _concat_indexes([x.columns for x in frames])
10831083
new_index = index
10841084

1085-
new_values = np.concatenate([x.values for x in frames], axis=axis)
1086-
result = DataFrame(new_values, index=new_index, columns=new_columns)
1087-
return result.reindex(index=index, columns=columns)
1085+
if frames[0]._is_mixed_type:
1086+
new_data = {}
1087+
for col in new_columns:
1088+
new_data[col] = np.concatenate([x[col].values for x in frames])
1089+
return DataFrame(new_data, index=new_index, columns=new_columns)
1090+
else:
1091+
new_values = np.concatenate([x.values for x in frames], axis=axis)
1092+
result = DataFrame(new_values, index=new_index, columns=new_columns)
1093+
return result.reindex(index=index, columns=columns)
1094+
1095+
def _concat_indexes(indexes):
1096+
if len(indexes) == 1:
1097+
new_index = indexes[0]
1098+
else:
1099+
new_index = indexes[0].append(indexes[1:])
1100+
return new_index
10881101

10891102
def _concat_frames_hierarchical(frames, keys, groupings, axis=0):
10901103
if axis == 0:
@@ -1096,8 +1109,14 @@ def _concat_frames_hierarchical(frames, keys, groupings, axis=0):
10961109
new_columns = _make_concat_multiindex(all_columns, keys, groupings)
10971110
new_index = frames[0].index
10981111

1099-
new_values = np.concatenate([x.values for x in frames], axis=axis)
1100-
return DataFrame(new_values, index=new_index, columns=new_columns)
1112+
if frames[0]._is_mixed_type:
1113+
new_data = {}
1114+
for col in new_columns:
1115+
new_data[col] = np.concatenate([x[col].values for x in frames])
1116+
return DataFrame(new_data, index=new_index, columns=new_columns)
1117+
else:
1118+
new_values = np.concatenate([x.values for x in frames], axis=axis)
1119+
return DataFrame(new_values, index=new_index, columns=new_columns)
11011120

11021121
def _make_concat_multiindex(indexes, keys, groupings):
11031122
if not _all_indexes_same(indexes):

pandas/tests/test_groupby.py

+19-1
Original file line numberDiff line numberDiff line change
@@ -872,7 +872,7 @@ def test_grouping_ndarray(self):
872872
expected = self.df.groupby('A').sum()
873873
assert_frame_equal(result, expected)
874874

875-
def test_apply_example(self):
875+
def test_apply_typecast_fail(self):
876876
df = DataFrame({'d' : [1.,1.,1.,2.,2.,2.],
877877
'c' : np.tile(['a','b','c'], 2),
878878
'v' : np.arange(1., 7.)})
@@ -889,6 +889,24 @@ def f(group):
889889

890890
assert_frame_equal(result, expected)
891891

892+
def test_apply_multiindex_fail(self):
893+
index = MultiIndex.from_arrays([[0, 0, 0, 1, 1, 1],
894+
[1, 2, 3, 1, 2, 3]])
895+
df = DataFrame({'d' : [1.,1.,1.,2.,2.,2.],
896+
'c' : np.tile(['a','b','c'], 2),
897+
'v' : np.arange(1., 7.)}, index=index)
898+
899+
def f(group):
900+
v = group['v']
901+
group['v2'] = (v - v.min()) / (v.max() - v.min())
902+
return group
903+
904+
result = df.groupby('d').apply(f)
905+
906+
expected = df.copy()
907+
expected['v2'] = np.tile([0., 0.5, 1], 2)
908+
909+
assert_frame_equal(result, expected)
892910

893911
class TestPanelGroupBy(unittest.TestCase):
894912

0 commit comments

Comments
 (0)