Skip to content

Commit 2e4da9b

Browse files
Sangmin Parkjreback
Sangmin Park
authored andcommitted
ERR: Better error reporting with .transform and an invalid output
closes #10165 closes #12474
1 parent 91967c8 commit 2e4da9b

File tree

4 files changed

+19
-4
lines changed

4 files changed

+19
-4
lines changed

doc/source/whatsnew/v0.18.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -865,6 +865,7 @@ Other API Changes
865865
- ``.to_latex`` and ``.to_html`` gain a ``decimal`` parameter like ``.to_csv``; the default is ``'.'`` (:issue:`12031`)
866866
- More helpful error message when constructing a ``DataFrame`` with empty data but with indices (:issue:`8020`)
867867
- ``.describe()`` will now properly handle bool dtype as a categorical (:issue:`6625`)
868+
- More helpful error message invalid ``.transform`` with user defined input (:issue:`10165`)
868869

869870
.. _whatsnew_0180.deprecations:
870871

doc/source/whatsnew/v0.18.1.txt

-1
Original file line numberDiff line numberDiff line change
@@ -43,4 +43,3 @@ Performance Improvements
4343

4444
Bug Fixes
4545
~~~~~~~~~
46-

pandas/core/groupby.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -3346,9 +3346,9 @@ def _transform_general(self, func, *args, **kwargs):
33463346
path, res = self._choose_path(fast_path, slow_path, group)
33473347
except TypeError:
33483348
return self._transform_item_by_item(obj, fast_path)
3349-
except Exception: # pragma: no cover
3350-
res = fast_path(group)
3351-
path = fast_path
3349+
except ValueError:
3350+
msg = 'transform must return a scalar value for each group'
3351+
raise ValueError(msg)
33523352
else:
33533353
res = path(group)
33543354

pandas/tests/test_groupby.py

+15
Original file line numberDiff line numberDiff line change
@@ -6104,6 +6104,21 @@ def test_nunique_with_object(self):
61046104
expected = pd.Series([1] * 5, name='name', index=index)
61056105
tm.assert_series_equal(result, expected)
61066106

6107+
def test_transform_with_non_scalar_group(self):
6108+
# GH 10165
6109+
cols = pd.MultiIndex.from_tuples([
6110+
('syn', 'A'), ('mis', 'A'), ('non', 'A'),
6111+
('syn', 'C'), ('mis', 'C'), ('non', 'C'),
6112+
('syn', 'T'), ('mis', 'T'), ('non', 'T'),
6113+
('syn', 'G'), ('mis', 'G'), ('non', 'G')])
6114+
df = pd.DataFrame(np.random.randint(1, 10, (4, 12)),
6115+
columns=cols,
6116+
index=['A', 'C', 'G', 'T'])
6117+
self.assertRaisesRegexp(ValueError, 'transform must return a scalar '
6118+
'value for each group.*', df.groupby
6119+
(axis=1, level=1).transform,
6120+
lambda z: z.div(z.sum(axis=1), axis=0))
6121+
61076122

61086123
def assert_fp_equal(a, b):
61096124
assert (np.abs(a - b) < 1e-12).all()

0 commit comments

Comments
 (0)