Skip to content

Commit ffee51f

Browse files
topper-123SeeminSyed
authored andcommitted
PERF: copy cached attributes on index shallow_copy (pandas-dev#32568)
1 parent 102db32 commit ffee51f

File tree

3 files changed

+12
-10
lines changed

3 files changed

+12
-10
lines changed

doc/source/whatsnew/v1.1.0.rst

+3-1
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,9 @@ Performance improvements
188188
- Performance improvement in :class:`Timedelta` constructor (:issue:`30543`)
189189
- Performance improvement in :class:`Timestamp` constructor (:issue:`30543`)
190190
- Performance improvement in flex arithmetic ops between :class:`DataFrame` and :class:`Series` with ``axis=0`` (:issue:`31296`)
191-
-
191+
- The internal :meth:`Index._shallow_copy` now copies cached attributes over to the new index,
192+
avoiding creating these again on the new index. This can speed up many operations
193+
that depend on creating copies of existing indexes (:issue:`28584`)
192194

193195
.. ---------------------------------------------------------------------------
194196

pandas/core/indexes/base.py

+7-3
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from datetime import datetime
22
import operator
33
from textwrap import dedent
4-
from typing import TYPE_CHECKING, Any, FrozenSet, Hashable, Union
4+
from typing import TYPE_CHECKING, Any, Dict, FrozenSet, Hashable, Union
55
import warnings
66

77
import numpy as np
@@ -250,6 +250,7 @@ def _outer_indexer(self, left, right):
250250

251251
_typ = "index"
252252
_data: Union[ExtensionArray, np.ndarray]
253+
_cache: Dict[str, Any]
253254
_id = None
254255
_name: Label = None
255256
# MultiIndex.levels previously allowed setting the index name. We
@@ -468,6 +469,7 @@ def _simple_new(cls, values, name: Label = None):
468469
# we actually set this value too.
469470
result._index_data = values
470471
result._name = name
472+
result._cache = {}
471473

472474
return result._reset_identity()
473475

@@ -498,11 +500,13 @@ def _shallow_copy(self, values=None, name: Label = no_default):
498500
name : Label, defaults to self.name
499501
"""
500502
name = self.name if name is no_default else name
501-
503+
cache = self._cache.copy() if values is None else {}
502504
if values is None:
503505
values = self.values
504506

505-
return self._simple_new(values, name=name)
507+
result = self._simple_new(values, name=name)
508+
result._cache = cache
509+
return result
506510

507511
def _shallow_copy_with_infer(self, values, **kwargs):
508512
"""

pandas/core/indexes/numeric.py

+2-6
Original file line numberDiff line numberDiff line change
@@ -104,15 +104,11 @@ def _maybe_cast_slice_bound(self, label, side, kind):
104104

105105
@Appender(Index._shallow_copy.__doc__)
106106
def _shallow_copy(self, values=None, name: Label = lib.no_default):
107-
name = name if name is not lib.no_default else self.name
108-
109107
if values is not None and not self._can_hold_na and values.dtype.kind == "f":
108+
name = self.name if name is lib.no_default else name
110109
# Ensure we are not returning an Int64Index with float data:
111110
return Float64Index._simple_new(values, name=name)
112-
113-
if values is None:
114-
values = self.values
115-
return type(self)._simple_new(values, name=name)
111+
return super()._shallow_copy(values=values, name=name)
116112

117113
def _convert_for_op(self, value):
118114
"""

0 commit comments

Comments
 (0)