Skip to content

Commit f4c5797

Browse files
authored
PERF: groupby.transform broadcast perf (#45708)
1 parent 6a2272b commit f4c5797

File tree

3 files changed

+11
-1
lines changed

3 files changed

+11
-1
lines changed

asv_bench/benchmarks/groupby.py

+9
Original file line numberDiff line numberDiff line change
@@ -741,6 +741,12 @@ def setup(self):
741741
index=np.random.choice(range(10), n),
742742
)
743743

744+
n = 1_000_000
745+
self.df_tall = DataFrame(
746+
np.random.randn(n, 3),
747+
index=np.random.randint(0, 5, n),
748+
)
749+
744750
n = 20000
745751
self.df1 = DataFrame(
746752
np.random.randint(1, n, (n, 3)), columns=["jim", "joe", "jolie"]
@@ -760,6 +766,9 @@ def time_transform_lambda_max(self):
760766
def time_transform_ufunc_max(self):
761767
self.df.groupby(level="lev1").transform(np.max)
762768

769+
def time_transform_lambda_max_tall(self):
770+
self.df_tall.groupby(level=0).transform(lambda x: np.max(x, axis=0))
771+
763772
def time_transform_lambda_max_wide(self):
764773
self.df_wide.groupby(level=0).transform(lambda x: np.max(x, axis=0))
765774

doc/source/whatsnew/v1.5.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,7 @@ Performance improvements
202202
~~~~~~~~~~~~~~~~~~~~~~~~
203203
- Performance improvement in :meth:`.GroupBy.transform` for some user-defined DataFrame -> Series functions (:issue:`45387`)
204204
- Performance improvement in :meth:`DataFrame.duplicated` when subset consists of only one column (:issue:`45236`)
205+
- Performance improvement in :meth:`.GroupBy.transform` when broadcasting values for user-defined functions (:issue:`45708`)
205206
-
206207

207208
.. ---------------------------------------------------------------------------

pandas/core/groupby/generic.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1795,7 +1795,7 @@ def _wrap_transform_general_frame(
17951795
res_frame.index = group.index
17961796
else:
17971797
res_frame = obj._constructor(
1798-
np.concatenate([res.values] * len(group.index)).reshape(group.shape),
1798+
np.tile(res.values, (len(group.index), 1)),
17991799
columns=group.columns,
18001800
index=group.index,
18011801
)

0 commit comments

Comments
 (0)