Skip to content

Commit 36f9052

Browse files
TomAugspurgerjreback
authored andcommitted
PERF: Correct signature for group_nth / group_object (#19579)
1 parent b2940a0 commit 36f9052

File tree

3 files changed

+25
-2
lines changed

3 files changed

+25
-2
lines changed

asv_bench/benchmarks/groupby.py

+16
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,22 @@ def time_series_nth(self, df):
160160
df[1].groupby(df[0]).nth(0)
161161

162162

163+
class NthObject(object):
164+
165+
goal_time = 0.2
166+
167+
def setup_cache(self):
168+
df = DataFrame(np.random.randint(1, 100, (10000,)), columns=['g'])
169+
df['obj'] = ['a'] * 5000 + ['b'] * 5000
170+
return df
171+
172+
def time_nth(self, df):
173+
df.groupby('g').nth(5)
174+
175+
def time_nth_last(self, df):
176+
df.groupby('g').last()
177+
178+
163179
class DateAttributes(object):
164180

165181
goal_time = 0.2

doc/source/whatsnew/v0.23.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -746,6 +746,7 @@ Groupby/Resample/Rolling
746746
- Bug in :func:`DataFrame.transform` where particular aggregation functions were being incorrectly cast to match the dtype(s) of the grouped data (:issue:`19200`)
747747
- Bug in :func:`DataFrame.groupby` passing the `on=` kwarg, and subsequently using ``.apply()`` (:issue:`17813`)
748748
- Bug in :func:`DataFrame.resample().aggregate` not raising a ``KeyError`` when aggregating a non-existent column (:issue:`16766`, :issue:`19566`)
749+
- Fixed a performance regression for ``GroupBy.nth`` and ``GroupBy.last`` with some object columns (:issue:`19283`)
749750

750751
Sparse
751752
^^^^^^

pandas/_libs/groupby.pyx

+8-2
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,8 @@ def group_nth_object(ndarray[object, ndim=2] out,
3636
ndarray[int64_t] counts,
3737
ndarray[object, ndim=2] values,
3838
ndarray[int64_t] labels,
39-
int64_t rank):
39+
int64_t rank,
40+
Py_ssize_t min_count=-1):
4041
"""
4142
Only aggregates on axis=0
4243
"""
@@ -47,6 +48,8 @@ def group_nth_object(ndarray[object, ndim=2] out,
4748
ndarray[int64_t, ndim=2] nobs
4849
ndarray[object, ndim=2] resx
4950

51+
assert min_count == -1, "'min_count' only used in add and prod"
52+
5053
nobs = np.zeros((<object> out).shape, dtype=np.int64)
5154
resx = np.empty((<object> out).shape, dtype=object)
5255

@@ -80,7 +83,8 @@ def group_nth_object(ndarray[object, ndim=2] out,
8083
def group_last_object(ndarray[object, ndim=2] out,
8184
ndarray[int64_t] counts,
8285
ndarray[object, ndim=2] values,
83-
ndarray[int64_t] labels):
86+
ndarray[int64_t] labels,
87+
Py_ssize_t min_count=-1):
8488
"""
8589
Only aggregates on axis=0
8690
"""
@@ -91,6 +95,8 @@ def group_last_object(ndarray[object, ndim=2] out,
9195
ndarray[object, ndim=2] resx
9296
ndarray[int64_t, ndim=2] nobs
9397

98+
assert min_count == -1, "'min_count' only used in add and prod"
99+
94100
nobs = np.zeros((<object> out).shape, dtype=np.int64)
95101
resx = np.empty((<object> out).shape, dtype=object)
96102

0 commit comments

Comments
 (0)