Skip to content

Commit e5923d0

Browse files
committed
BUG: Bug in DataFrameGroupby.transform when transforming with a passed non-sorted key (GH8046)
1 parent 14ac5df commit e5923d0

File tree

3 files changed

+42
-17
lines changed

3 files changed

+42
-17
lines changed

doc/source/v0.15.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -465,7 +465,7 @@ Bug Fixes
465465
when matching block and manager items, when there's only one block there's no ambiguity (:issue:`7794`)
466466

467467
- Bug in HDFStore iteration when passing a where (:issue:`8014`)
468-
468+
- Bug in DataFrameGroupby.transform when transforming with a passed non-sorted key (:issue:`8046`)
469469
- Bug in repeated timeseries line and area plot may result in ``ValueError`` or incorrect kind (:issue:`7733`)
470470

471471

pandas/core/groupby.py

+20-15
Original file line numberDiff line numberDiff line change
@@ -475,6 +475,24 @@ def _set_selection_from_grouper(self):
475475
if len(groupers):
476476
self._group_selection = (ax-Index(groupers)).tolist()
477477

478+
def _set_result_index_ordered(self, result):
479+
# set the result index on the passed values object
480+
# return the new object
481+
# related 8046
482+
483+
# the values/counts are repeated according to the group index
484+
indices = self.indices
485+
486+
# shortcut of we have an already ordered grouper
487+
488+
if not Index(self.grouper.group_info[0]).is_monotonic:
489+
index = Index(np.concatenate([ indices[v] for v in self.grouper.result_index ]))
490+
result.index = index
491+
result = result.sort_index()
492+
493+
result.index = self.obj.index
494+
return result
495+
478496
def _local_dir(self):
479497
return sorted(set(self.obj._local_dir() + list(self._apply_whitelist)))
480498

@@ -2087,7 +2105,6 @@ def _convert_grouper(axis, grouper):
20872105
else:
20882106
return grouper
20892107

2090-
20912108
class SeriesGroupBy(GroupBy):
20922109
_apply_whitelist = _series_apply_whitelist
20932110

@@ -2319,18 +2336,7 @@ def _transform_fast(self, func):
23192336
counts = self.count().values
23202337
values = np.repeat(values, com._ensure_platform_int(counts))
23212338

2322-
# the values/counts are repeated according to the group index
2323-
indices = self.indices
2324-
2325-
# shortcut of we have an already ordered grouper
2326-
if Index(self.grouper.group_info[0]).is_monotonic:
2327-
result = Series(values, index=self.obj.index)
2328-
else:
2329-
index = Index(np.concatenate([ indices[v] for v in self.grouper.result_index ]))
2330-
result = Series(values, index=index).sort_index()
2331-
result.index = self.obj.index
2332-
2333-
return result
2339+
return self._set_result_index_ordered(Series(values))
23342340

23352341
def filter(self, func, dropna=True, *args, **kwargs):
23362342
"""
@@ -2842,8 +2848,7 @@ def _transform_general(self, func, *args, **kwargs):
28422848
concat_index = obj.columns if self.axis == 0 else obj.index
28432849
concatenated = concat(applied, join_axes=[concat_index],
28442850
axis=self.axis, verify_integrity=False)
2845-
concatenated.sort_index(inplace=True)
2846-
return concatenated
2851+
return self._set_result_index_ordered(concatenated)
28472852

28482853
def transform(self, func, *args, **kwargs):
28492854
"""

pandas/tests/test_groupby.py

+21-1
Original file line numberDiff line numberDiff line change
@@ -796,6 +796,26 @@ def test_transform(self):
796796
transformed = grouped.transform(lambda x: x * x.sum())
797797
self.assertEqual(transformed[7], 12)
798798

799+
# GH 8046
800+
# make sure that we preserve the input order
801+
802+
df = DataFrame(np.arange(6,dtype='int64').reshape(3,2), columns=["a","b"], index=[0,2,1])
803+
key = [0,0,1]
804+
expected = df.sort_index().groupby(key).transform(lambda x: x-x.mean()).groupby(key).mean()
805+
result = df.groupby(key).transform(lambda x: x-x.mean()).groupby(key).mean()
806+
assert_frame_equal(result, expected)
807+
808+
def demean(arr):
809+
return arr - arr.mean()
810+
811+
people = DataFrame(np.random.randn(5, 5),
812+
columns=['a', 'b', 'c', 'd', 'e'],
813+
index=['Joe', 'Steve', 'Wes', 'Jim', 'Travis'])
814+
key = ['one', 'two', 'one', 'two', 'one']
815+
result = people.groupby(key).transform(demean).groupby(key).mean()
816+
expected = people.groupby(key).apply(demean).groupby(key).mean()
817+
assert_frame_equal(result, expected)
818+
799819
def test_transform_fast(self):
800820

801821
df = DataFrame( { 'id' : np.arange( 100000 ) / 3,
@@ -2924,7 +2944,7 @@ def __call__(self, x):
29242944
lambda x: sum(x),
29252945
lambda x: x.sum(),
29262946
partial(sum), fn_class()]
2927-
2947+
29282948
expected = df.groupby("foo").agg(sum)
29292949
for ecall in equiv_callables:
29302950
result = df.groupby('foo').agg(ecall)

0 commit comments

Comments
 (0)