diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index e745bf3f5feed..f20c3df027fba 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -187,7 +187,9 @@ Performance improvements - Performance improvement in :class:`Timedelta` constructor (:issue:`30543`) - Performance improvement in :class:`Timestamp` constructor (:issue:`30543`) - Performance improvement in flex arithmetic ops between :class:`DataFrame` and :class:`Series` with ``axis=0`` (:issue:`31296`) -- +- The internal :meth:`Index._shallow_copy` now copies cached attributes over to the new index, + avoiding creating these again on the new index. This can speed up many operations + that depend on creating copies of existing indexes (:issue:`28584`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 3eab757311ccb..17a1827fda027 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1,7 +1,7 @@ from datetime import datetime import operator from textwrap import dedent -from typing import TYPE_CHECKING, Any, FrozenSet, Hashable, Union +from typing import TYPE_CHECKING, Any, Dict, FrozenSet, Hashable, Union import warnings import numpy as np @@ -250,6 +250,7 @@ def _outer_indexer(self, left, right): _typ = "index" _data: Union[ExtensionArray, np.ndarray] + _cache: Dict[str, Any] _id = None _name: Label = None # MultiIndex.levels previously allowed setting the index name. We @@ -468,6 +469,7 @@ def _simple_new(cls, values, name: Label = None): # we actually set this value too. result._index_data = values result._name = name + result._cache = {} return result._reset_identity() @@ -498,11 +500,13 @@ def _shallow_copy(self, values=None, name: Label = no_default): name : Label, defaults to self.name """ name = self.name if name is no_default else name - + cache = self._cache.copy() if values is None else {} if values is None: values = self.values - return self._simple_new(values, name=name) + result = self._simple_new(values, name=name) + result._cache = cache + return result def _shallow_copy_with_infer(self, values, **kwargs): """ diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 6c250ccd09a51..2d1d69772c100 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -104,15 +104,11 @@ def _maybe_cast_slice_bound(self, label, side, kind): @Appender(Index._shallow_copy.__doc__) def _shallow_copy(self, values=None, name: Label = lib.no_default): - name = name if name is not lib.no_default else self.name - if values is not None and not self._can_hold_na and values.dtype.kind == "f": + name = self.name if name is lib.no_default else name # Ensure we are not returning an Int64Index with float data: return Float64Index._simple_new(values, name=name) - - if values is None: - values = self.values - return type(self)._simple_new(values, name=name) + return super()._shallow_copy(values=values, name=name) def _convert_for_op(self, value): """