Skip to content

Commit f99f050

Browse files
committed
BUG: incorrect broadcasting that could casuse dtype coercion in a groupby-transform
closes #14457 Author: Jeff Reback <[email protected]> Closes #14466 from jreback/transform and squashes the following commits: ce595b9 [Jeff Reback] BUG: incorrect broadcasting that could casuse dtype coercion in a groupby-transform
1 parent 4852008 commit f99f050

File tree

3 files changed

+31
-6
lines changed

3 files changed

+31
-6
lines changed

doc/source/whatsnew/v0.19.1.txt

+4
Original file line numberDiff line numberDiff line change
@@ -43,9 +43,13 @@ Bug Fixes
4343
- Bug in string indexing against data with ``object`` ``Index`` may raise ``AttributeError`` (:issue:`14424`)
4444
- Corrrecly raise ``ValueError`` on empty input to ``pd.eval()`` and ``df.query()`` (:issue:`13139`)
4545

46+
4647
- Bug in ``RangeIndex.intersection`` when result is a empty set (:issue:`14364`).
4748
- Bug in union of differences from a ``DatetimeIndex`; this is a regression in 0.19.0 from 0.18.1 (:issue:`14323`)
4849

50+
- Bug in groupby-transform broadcasting that could cause incorrect dtype coercion (:issue:`14457`)
51+
52+
4953
- Bug in ``Series.__setitem__`` which allowed mutating read-only arrays (:issue:`14359`).
5054

5155

pandas/core/groupby.py

+15-6
Original file line numberDiff line numberDiff line change
@@ -3460,7 +3460,6 @@ def _transform_general(self, func, *args, **kwargs):
34603460
from pandas.tools.merge import concat
34613461

34623462
applied = []
3463-
34643463
obj = self._obj_with_exclusions
34653464
gen = self.grouper.get_iterator(obj, axis=self.axis)
34663465
fast_path, slow_path = self._define_paths(func, *args, **kwargs)
@@ -3481,14 +3480,24 @@ def _transform_general(self, func, *args, **kwargs):
34813480
else:
34823481
res = path(group)
34833482

3484-
# broadcasting
34853483
if isinstance(res, Series):
3486-
if res.index.is_(obj.index):
3487-
group.T.values[:] = res
3484+
3485+
# we need to broadcast across the
3486+
# other dimension; this will preserve dtypes
3487+
# GH14457
3488+
if not np.prod(group.shape):
3489+
continue
3490+
elif res.index.is_(obj.index):
3491+
r = concat([res] * len(group.columns), axis=1)
3492+
r.columns = group.columns
3493+
r.index = group.index
34883494
else:
3489-
group.values[:] = res
3495+
r = DataFrame(
3496+
np.concatenate([res.values] * len(group.index)
3497+
).reshape(group.shape),
3498+
columns=group.columns, index=group.index)
34903499

3491-
applied.append(group)
3500+
applied.append(r)
34923501
else:
34933502
applied.append(res)
34943503

pandas/tests/test_groupby.py

+12
Original file line numberDiff line numberDiff line change
@@ -1366,6 +1366,18 @@ def nsum(x):
13661366
for result in results:
13671367
assert_series_equal(result, expected, check_names=False)
13681368

1369+
def test_transform_coercion(self):
1370+
1371+
# 14457
1372+
# when we are transforming be sure to not coerce
1373+
# via assignment
1374+
df = pd.DataFrame(dict(A=['a', 'a'], B=[0, 1]))
1375+
g = df.groupby('A')
1376+
1377+
expected = g.transform(np.mean)
1378+
result = g.transform(lambda x: np.mean(x))
1379+
assert_frame_equal(result, expected)
1380+
13691381
def test_with_na(self):
13701382
index = Index(np.arange(10))
13711383

0 commit comments

Comments
 (0)