Skip to content

Commit 6531206

Browse files
committed
BUG: Bug in groupby.transform(..) when axis=1 is specified with a non-monotonic ordered index
closes #12713
1 parent 247fe07 commit 6531206

File tree

3 files changed

+47
-8
lines changed

3 files changed

+47
-8
lines changed

doc/source/whatsnew/v0.18.1.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ Performance Improvements
118118
Bug Fixes
119119
~~~~~~~~~
120120
- ``usecols`` parameter in ``pd.read_csv`` is now respected even when the lines of a CSV file are not even (:issue:`12203`)
121-
121+
- Bug in ``groupby.transform(..)`` when ``axis=1`` is specified with a non-monotonic ordered index (:issue:`12713`)
122122
- Bug in ``Period`` and ``PeriodIndex`` creation raises ``KeyError`` if ``freq="Minute"`` is specified. Note that "Minute" freq is deprecated in v0.17.0, and recommended to use ``freq="T"`` instead (:issue:`11854`)
123123
- Bug in printing data which contains ``Period`` with different ``freq`` raises ``ValueError`` (:issue:`12615`)
124124
- Bug in numpy compatibility of ``np.round()`` on a ``Series`` (:issue:`12600`)

pandas/core/groupby.py

+6-7
Original file line numberDiff line numberDiff line change
@@ -457,19 +457,18 @@ def _set_selection_from_grouper(self):
457457
self._group_selection = ax.difference(Index(groupers)).tolist()
458458

459459
def _set_result_index_ordered(self, result):
460-
# set the result index on the passed values object
461-
# return the new object
462-
# related 8046
460+
# set the result index on the passed values object and
461+
# return the new object, xref 8046
463462

464463
# the values/counts are repeated according to the group index
465-
# shortcut of we have an already ordered grouper
464+
# shortcut if we have an already ordered grouper
466465
if not self.grouper.is_monotonic:
467466
index = Index(np.concatenate(
468467
self._get_indices(self.grouper.result_index)))
469-
result.index = index
470-
result = result.sort_index()
468+
result.set_axis(self.axis, index)
469+
result = result.sort_index(axis=self.axis)
471470

472-
result.index = self.obj.index
471+
result.set_axis(self.axis, self.obj._get_axis(self.axis))
473472
return result
474473

475474
def _dir_additions(self):

pandas/tests/test_groupby.py

+40
Original file line numberDiff line numberDiff line change
@@ -1090,6 +1090,46 @@ def test_transform_broadcast(self):
10901090
for idx in gp.index:
10911091
assert_fp_equal(res.xs(idx), agged[idx])
10921092

1093+
def test_transform_axis(self):
1094+
1095+
# make sure that we are setting the axes
1096+
# correctly when on axis=0 or 1
1097+
# in the presence of a non-monotonic indexer
1098+
# GH12713
1099+
1100+
base = self.tsframe.iloc[0:5]
1101+
r = len(base.index)
1102+
c = len(base.columns)
1103+
tso = DataFrame(np.random.randn(r, c),
1104+
index=base.index,
1105+
columns=base.columns,
1106+
dtype='float64')
1107+
# monotonic
1108+
ts = tso
1109+
grouped = ts.groupby(lambda x: x.weekday())
1110+
result = ts - grouped.transform('mean')
1111+
expected = grouped.apply(lambda x: x - x.mean())
1112+
assert_frame_equal(result, expected)
1113+
1114+
ts = ts.T
1115+
grouped = ts.groupby(lambda x: x.weekday(), axis=1)
1116+
result = ts - grouped.transform('mean')
1117+
expected = grouped.apply(lambda x: (x.T - x.mean(1)).T)
1118+
assert_frame_equal(result, expected)
1119+
1120+
# non-monotonic
1121+
ts = tso.iloc[[1, 0] + list(range(2, len(base)))]
1122+
grouped = ts.groupby(lambda x: x.weekday())
1123+
result = ts - grouped.transform('mean')
1124+
expected = grouped.apply(lambda x: x - x.mean())
1125+
assert_frame_equal(result, expected)
1126+
1127+
ts = ts.T
1128+
grouped = ts.groupby(lambda x: x.weekday(), axis=1)
1129+
result = ts - grouped.transform('mean')
1130+
expected = grouped.apply(lambda x: (x.T - x.mean(1)).T)
1131+
assert_frame_equal(result, expected)
1132+
10931133
def test_transform_dtype(self):
10941134
# GH 9807
10951135
# Check transform dtype output is preserved

0 commit comments

Comments
 (0)