Skip to content

Commit d5b7c93

Browse files
eovesonPingviinituutti
authored andcommitted
PERF: For GH23814, return early in Categorical.__init__ (pandas-dev#23888)
1 parent edaf513 commit d5b7c93

File tree

3 files changed

+19
-1
lines changed

3 files changed

+19
-1
lines changed

asv_bench/benchmarks/categoricals.py

+8
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ def setup(self):
4646
self.values_some_nan = list(np.tile(self.categories + [np.nan], N))
4747
self.values_all_nan = [np.nan] * len(self.values)
4848
self.values_all_int8 = np.ones(N, 'int8')
49+
self.categorical = pd.Categorical(self.values, self.categories)
50+
self.series = pd.Series(self.categorical)
4951

5052
def time_regular(self):
5153
pd.Categorical(self.values, self.categories)
@@ -68,6 +70,12 @@ def time_all_nan(self):
6870
def time_from_codes_all_int8(self):
6971
pd.Categorical.from_codes(self.values_all_int8, self.categories)
7072

73+
def time_existing_categorical(self):
74+
pd.Categorical(self.categorical)
75+
76+
def time_existing_series(self):
77+
pd.Categorical(self.series)
78+
7179

7280
class ValueCounts(object):
7381

doc/source/whatsnew/v0.24.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -1229,7 +1229,7 @@ Performance Improvements
12291229
- Improved performance of :func:`pd.concat` for `Series` objects (:issue:`23404`)
12301230
- Improved performance of :meth:`DatetimeIndex.normalize` and :meth:`Timestamp.normalize` for timezone naive or UTC datetimes (:issue:`23634`)
12311231
- Improved performance of :meth:`DatetimeIndex.tz_localize` and various ``DatetimeIndex`` attributes with dateutil UTC timezone (:issue:`23772`)
1232-
1232+
- Improved performance of :class:`Categorical` constructor for `Series` objects (:issue:`23814`)
12331233

12341234
.. _whatsnew_0240.docs:
12351235

pandas/core/arrays/categorical.py

+10
Original file line numberDiff line numberDiff line change
@@ -347,6 +347,16 @@ def __init__(self, values, categories=None, ordered=None, dtype=None,
347347
# the "ordered" and "categories" arguments
348348
dtype = values.dtype._from_categorical_dtype(values.dtype,
349349
categories, ordered)
350+
351+
# GH23814, for perf, if values._values already an instance of
352+
# Categorical, set values to codes, and run fastpath
353+
if (isinstance(values, (ABCSeries, ABCIndexClass)) and
354+
isinstance(values._values, type(self))):
355+
values = values._values.codes.copy()
356+
if categories is None:
357+
categories = dtype.categories
358+
fastpath = True
359+
350360
else:
351361
# If dtype=None and values is not categorical, create a new dtype
352362
dtype = CategoricalDtype(categories, ordered)

0 commit comments

Comments
 (0)