Skip to content

Commit de10e72

Browse files
authored
BUG: GroupBy().fillna() performance regression (#37149)
1 parent 87429a6 commit de10e72

File tree

3 files changed

+23
-2
lines changed

3 files changed

+23
-2
lines changed

asv_bench/benchmarks/groupby.py

+20
Original file line numberDiff line numberDiff line change
@@ -358,6 +358,26 @@ def time_category_size(self):
358358
self.draws.groupby(self.cats).size()
359359

360360

361+
class FillNA:
362+
def setup(self):
363+
N = 100
364+
self.df = DataFrame(
365+
{"group": [1] * N + [2] * N, "value": [np.nan, 1.0] * N}
366+
).set_index("group")
367+
368+
def time_df_ffill(self):
369+
self.df.groupby("group").fillna(method="ffill")
370+
371+
def time_df_bfill(self):
372+
self.df.groupby("group").fillna(method="bfill")
373+
374+
def time_srs_ffill(self):
375+
self.df.groupby("group")["value"].fillna(method="ffill")
376+
377+
def time_srs_bfill(self):
378+
self.df.groupby("group")["value"].fillna(method="bfill")
379+
380+
361381
class GroupByMethods:
362382

363383
param_names = ["dtype", "method", "application"]

doc/source/whatsnew/v1.1.4.rst

+1
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ Bug fixes
2929
~~~~~~~~~
3030
- Bug causing ``groupby(...).sum()`` and similar to not preserve metadata (:issue:`29442`)
3131
- Bug in :meth:`Series.isin` and :meth:`DataFrame.isin` raising a ``ValueError`` when the target was read-only (:issue:`37174`)
32+
- Bug in :meth:`GroupBy.fillna` that introduced a performance regression after 1.0.5 (:issue:`36757`)
3233

3334
.. ---------------------------------------------------------------------------
3435

pandas/core/groupby/groupby.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1196,12 +1196,12 @@ def reset_identity(values):
11961196
# when the ax has duplicates
11971197
# so we resort to this
11981198
# GH 14776, 30667
1199-
if ax.has_duplicates:
1199+
if ax.has_duplicates and not result.axes[self.axis].equals(ax):
12001200
indexer, _ = result.index.get_indexer_non_unique(ax.values)
12011201
indexer = algorithms.unique1d(indexer)
12021202
result = result.take(indexer, axis=self.axis)
12031203
else:
1204-
result = result.reindex(ax, axis=self.axis)
1204+
result = result.reindex(ax, axis=self.axis, copy=False)
12051205

12061206
elif self.group_keys:
12071207

0 commit comments

Comments
 (0)