Skip to content

Commit 3b84673

Browse files
jschendelquintusdias
authored andcommitted
PERF: Improve performance of cut with IntervalIndex bins (pandas-dev#27669)
1 parent db3645a commit 3b84673

File tree

3 files changed

+11
-2
lines changed

3 files changed

+11
-2
lines changed

asv_bench/benchmarks/reshape.py

+9
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,7 @@ def setup(self, bins):
214214
self.datetime_series = pd.Series(
215215
np.random.randint(N, size=N), dtype="datetime64[ns]"
216216
)
217+
self.interval_bins = pd.IntervalIndex.from_breaks(np.linspace(0, N, bins))
217218

218219
def time_cut_int(self, bins):
219220
pd.cut(self.int_series, bins)
@@ -239,6 +240,14 @@ def time_qcut_timedelta(self, bins):
239240
def time_qcut_datetime(self, bins):
240241
pd.qcut(self.datetime_series, bins)
241242

243+
def time_cut_interval(self, bins):
244+
# GH 27668
245+
pd.cut(self.int_series, self.interval_bins)
246+
247+
def peakmem_cut_interval(self, bins):
248+
# GH 27668
249+
pd.cut(self.int_series, self.interval_bins)
250+
242251

243252
class Explode:
244253
param_names = ["n_rows", "max_list_length"]

doc/source/whatsnew/v1.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ Performance improvements
7171

7272
- Performance improvement in indexing with a non-unique :class:`IntervalIndex` (:issue:`27489`)
7373
- Performance improvement in `MultiIndex.is_monotonic` (:issue:`27495`)
74+
- Performance improvement in :func:`cut` when ``bins`` is an :class:`IntervalIndex` (:issue:`27668`)
7475

7576

7677
.. _whatsnew_1000.bug_fixes:

pandas/core/reshape/tile.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -373,8 +373,7 @@ def _bins_to_cuts(
373373
if isinstance(bins, IntervalIndex):
374374
# we have a fast-path here
375375
ids = bins.get_indexer(x)
376-
result = algos.take_nd(bins, ids)
377-
result = Categorical(result, categories=bins, ordered=True)
376+
result = Categorical.from_codes(ids, categories=bins, ordered=True)
378377
return result, bins
379378

380379
unique_bins = algos.unique(bins)

0 commit comments

Comments
 (0)