Skip to content

Commit 9060d88

Browse files
authored
PERF: copy cached attributes on extension index shallow_copy (#32640)
1 parent ec77341 commit 9060d88

File tree

7 files changed

+24
-18
lines changed

7 files changed

+24
-18
lines changed

doc/source/whatsnew/v1.1.0.rst

+5-5
Original file line numberDiff line numberDiff line change
@@ -89,9 +89,9 @@ Backwards incompatible API changes
8989
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
9090
- :meth:`DataFrame.swaplevels` now raises a ``TypeError`` if the axis is not a :class:`MultiIndex`.
9191
Previously a ``AttributeError`` was raised (:issue:`31126`)
92-
- :meth:`DataFrameGroupby.mean` and :meth:`SeriesGroupby.mean` (and similarly for :meth:`~DataFrameGroupby.median`, :meth:`~DataFrameGroupby.std`` and :meth:`~DataFrameGroupby.var``)
92+
- :meth:`DataFrameGroupby.mean` and :meth:`SeriesGroupby.mean` (and similarly for :meth:`~DataFrameGroupby.median`, :meth:`~DataFrameGroupby.std` and :meth:`~DataFrameGroupby.var`)
9393
now raise a ``TypeError`` if a not-accepted keyword argument is passed into it.
94-
Previously a ``UnsupportedFunctionCall`` was raised (``AssertionError`` if ``min_count`` passed into :meth:`~DataFrameGroupby.median``) (:issue:`31485`)
94+
Previously a ``UnsupportedFunctionCall`` was raised (``AssertionError`` if ``min_count`` passed into :meth:`~DataFrameGroupby.median`) (:issue:`31485`)
9595
- :meth:`DataFrame.at` and :meth:`Series.at` will raise a ``TypeError`` instead of a ``ValueError`` if an incompatible key is passed, and ``KeyError`` if a missing key is passed, matching the behavior of ``.loc[]`` (:issue:`31722`)
9696
- Passing an integer dtype other than ``int64`` to ``np.array(period_index, dtype=...)`` will now raise ``TypeError`` instead of incorrectly using ``int64`` (:issue:`32255`)
9797
-
@@ -188,9 +188,9 @@ Performance improvements
188188
- Performance improvement in :class:`Timedelta` constructor (:issue:`30543`)
189189
- Performance improvement in :class:`Timestamp` constructor (:issue:`30543`)
190190
- Performance improvement in flex arithmetic ops between :class:`DataFrame` and :class:`Series` with ``axis=0`` (:issue:`31296`)
191-
- The internal :meth:`Index._shallow_copy` now copies cached attributes over to the new index,
192-
avoiding creating these again on the new index. This can speed up many operations
193-
that depend on creating copies of existing indexes (:issue:`28584`)
191+
- The internal index method :meth:`~Index._shallow_copy` now copies cached attributes over to the new index,
192+
avoiding creating these again on the new index. This can speed up many operations that depend on creating copies of
193+
existing indexes (:issue:`28584`, :issue:`32640`)
194194

195195
.. ---------------------------------------------------------------------------
196196

pandas/core/indexes/category.py

+4-8
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,7 @@ def _simple_new(cls, values: Categorical, name: Label = None):
233233

234234
result._data = values
235235
result.name = name
236+
result._cache = {}
236237

237238
result._reset_identity()
238239
result._no_setting_name = False
@@ -242,14 +243,9 @@ def _simple_new(cls, values: Categorical, name: Label = None):
242243

243244
@Appender(Index._shallow_copy.__doc__)
244245
def _shallow_copy(self, values=None, name: Label = no_default):
245-
name = self.name if name is no_default else name
246-
247-
if values is None:
248-
values = self.values
249-
250-
cat = Categorical(values, dtype=self.dtype)
251-
252-
return type(self)._simple_new(cat, name=name)
246+
if values is not None:
247+
values = Categorical(values, dtype=self.dtype)
248+
return super()._shallow_copy(values=values, name=name)
253249

254250
def _is_dtype_compat(self, other) -> bool:
255251
"""

pandas/core/indexes/datetimelike.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -617,6 +617,7 @@ def _set_freq(self, freq):
617617

618618
def _shallow_copy(self, values=None, name: Label = lib.no_default):
619619
name = self.name if name is lib.no_default else name
620+
cache = self._cache.copy() if values is None else {}
620621

621622
if values is None:
622623
values = self._data
@@ -635,7 +636,9 @@ def _shallow_copy(self, values=None, name: Label = lib.no_default):
635636
del attributes["freq"]
636637

637638
attributes["name"] = name
638-
return type(self)._simple_new(values, **attributes)
639+
result = self._simple_new(values, **attributes)
640+
result._cache = cache
641+
return result
639642

640643
# --------------------------------------------------------------------
641644
# Set Operation Methods

pandas/core/indexes/datetimes.py

+1
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,7 @@ def _simple_new(cls, values, name=None, freq=None, tz=None, dtype=None):
268268
result = object.__new__(cls)
269269
result._data = dtarr
270270
result.name = name
271+
result._cache = {}
271272
result._no_setting_name = False
272273
# For groupby perf. See note in indexes/base about _index_data
273274
result._index_data = dtarr._data

pandas/core/indexes/interval.py

+8-4
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,7 @@ def _simple_new(cls, array: IntervalArray, name: Label = None):
243243
result = IntervalMixin.__new__(cls)
244244
result._data = array
245245
result.name = name
246+
result._cache = {}
246247
result._no_setting_name = False
247248
result._reset_identity()
248249
return result
@@ -332,12 +333,15 @@ def from_tuples(
332333
# --------------------------------------------------------------------
333334

334335
@Appender(Index._shallow_copy.__doc__)
335-
def _shallow_copy(self, values=None, **kwargs):
336+
def _shallow_copy(self, values=None, name: Label = lib.no_default):
337+
name = self.name if name is lib.no_default else name
338+
cache = self._cache.copy() if values is None else {}
336339
if values is None:
337340
values = self._data
338-
attributes = self._get_attributes_dict()
339-
attributes.update(kwargs)
340-
return self._simple_new(values, **attributes)
341+
342+
result = self._simple_new(values, name=name)
343+
result._cache = cache
344+
return result
341345

342346
@cache_readonly
343347
def _isnan(self):

pandas/core/indexes/period.py

+1
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,7 @@ def _simple_new(cls, values: PeriodArray, name: Label = None):
233233
# For groupby perf. See note in indexes/base about _index_data
234234
result._index_data = values._data
235235
result.name = name
236+
result._cache = {}
236237
result._reset_identity()
237238
return result
238239

pandas/core/indexes/timedeltas.py

+1
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,7 @@ def _simple_new(cls, values, name=None, freq=None, dtype=_TD_DTYPE):
180180
result = object.__new__(cls)
181181
result._data = values
182182
result._name = name
183+
result._cache = {}
183184
# For groupby perf. See note in indexes/base about _index_data
184185
result._index_data = values._data
185186

0 commit comments

Comments
 (0)