Skip to content

Commit 713fe11

Browse files
committed
Merge pull request #9699 from dsm054/fix-transform-length-mismatch
BUG: ensure we use group sizes, not group counts, in transform (GH9697)
2 parents 3a3429f + 8bae0d4 commit 713fe11

File tree

3 files changed

+16
-1
lines changed

3 files changed

+16
-1
lines changed

doc/source/whatsnew/v0.16.1.txt

+2
Original file line numberDiff line numberDiff line change
@@ -46,3 +46,5 @@ Performance Improvements
4646

4747
Bug Fixes
4848
~~~~~~~~~
49+
50+
- Bug in ``transform`` causing length mismatch when null entries were present and a fast aggregator was being used (:issue:`9697`)

pandas/core/groupby.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2453,7 +2453,7 @@ def _transform_fast(self, func):
24532453
if isinstance(func, compat.string_types):
24542454
func = getattr(self,func)
24552455
values = func().values
2456-
counts = self.count().values
2456+
counts = self.size().values
24572457
values = np.repeat(values, com._ensure_platform_int(counts))
24582458

24592459
return self._set_result_index_ordered(Series(values))

pandas/tests/test_groupby.py

+13
Original file line numberDiff line numberDiff line change
@@ -1058,6 +1058,19 @@ def test_transform_function_aliases(self):
10581058
expected = self.df.groupby('A')['C'].transform(np.mean)
10591059
assert_series_equal(result, expected)
10601060

1061+
def test_transform_length(self):
1062+
# GH 9697
1063+
df = pd.DataFrame({'col1':[1,1,2,2], 'col2':[1,2,3,np.nan]})
1064+
expected = pd.Series([3.0]*4)
1065+
def nsum(x):
1066+
return np.nansum(x)
1067+
results = [df.groupby('col1').transform(sum)['col2'],
1068+
df.groupby('col1')['col2'].transform(sum),
1069+
df.groupby('col1').transform(nsum)['col2'],
1070+
df.groupby('col1')['col2'].transform(nsum)]
1071+
for result in results:
1072+
assert_series_equal(result, expected)
1073+
10611074
def test_with_na(self):
10621075
index = Index(np.arange(10))
10631076

0 commit comments

Comments
 (0)