Skip to content

Commit c5e32b5

Browse files
authored
PERF: groupby.apply on a non-unique, unsorted index (#47234)
1 parent dcfba3c commit c5e32b5

File tree

3 files changed

+16
-2
lines changed

3 files changed

+16
-2
lines changed

asv_bench/benchmarks/groupby.py

+13
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from pandas import (
88
Categorical,
99
DataFrame,
10+
Index,
1011
MultiIndex,
1112
Series,
1213
Timestamp,
@@ -111,6 +112,18 @@ def time_copy_overhead_single_col(self, factor):
111112
self.df.groupby("key").apply(self.df_copy_function)
112113

113114

115+
class ApplyNonUniqueUnsortedIndex:
116+
def setup(self):
117+
# GH 46527
118+
# unsorted and non-unique index
119+
idx = np.arange(100)[::-1]
120+
idx = Index(np.repeat(idx, 200), name="key")
121+
self.df = DataFrame(np.random.randn(len(idx), 10), index=idx)
122+
123+
def time_groupby_apply_non_unique_unsorted_index(self):
124+
self.df.groupby("key", group_keys=False).apply(lambda x: x)
125+
126+
114127
class Groups:
115128

116129
param_names = ["key"]

doc/source/whatsnew/v1.5.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -692,6 +692,7 @@ Performance improvements
692692
- Performance improvement in :meth:`.GroupBy.diff` (:issue:`16706`)
693693
- Performance improvement in :meth:`.GroupBy.transform` when broadcasting values for user-defined functions (:issue:`45708`)
694694
- Performance improvement in :meth:`.GroupBy.transform` for user-defined functions when only a single group exists (:issue:`44977`)
695+
- Performance improvement in :meth:`.GroupBy.apply` when grouping on a non-unique unsorted index (:issue:`46527`)
695696
- Performance improvement in :meth:`DataFrame.loc` and :meth:`Series.loc` for tuple-based indexing of a :class:`MultiIndex` (:issue:`45681`, :issue:`46040`, :issue:`46330`)
696697
- Performance improvement in :attr:`MultiIndex.values` when the MultiIndex contains levels of type DatetimeIndex, TimedeltaIndex or ExtensionDtypes (:issue:`46288`)
697698
- Performance improvement in :func:`merge` when left and/or right are empty (:issue:`45838`)

pandas/core/groupby/groupby.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1141,8 +1141,8 @@ def reset_identity(values):
11411141
# so we resort to this
11421142
# GH 14776, 30667
11431143
if ax.has_duplicates and not result.axes[self.axis].equals(ax):
1144-
indexer, _ = result.index.get_indexer_non_unique(ax._values)
1145-
indexer = algorithms.unique1d(indexer)
1144+
target = algorithms.unique1d(ax._values)
1145+
indexer, _ = result.index.get_indexer_non_unique(target)
11461146
result = result.take(indexer, axis=self.axis)
11471147
else:
11481148
result = result.reindex(ax, axis=self.axis, copy=False)

0 commit comments

Comments
 (0)