From 7cad4f81c492bb87db772c480901e143c2c06870 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 13 Sep 2017 11:50:24 -0500 Subject: [PATCH] PERF: Faster CategoricalIndex from categorical --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/indexes/category.py | 4 ++++ pandas/tests/indexes/test_category.py | 10 ++++++++++ 3 files changed, 15 insertions(+) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 6495ad3e7f6ad..52e056103cbdc 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -469,6 +469,7 @@ Performance Improvements - :attr:`Series.dt` no longer performs frequency inference, yielding a large speedup when accessing the attribute (:issue:`17210`) - Improved performance of :meth:`Categorical.set_categories` by not materializing the values (:issue:`17508`) - :attr:`Timestamp.microsecond` no longer re-computes on attribute access (:issue:`17331`) +- Improved performance of the :class:`CategoricalIndex` for data that is already categorical dtype (:issue:`17513`) .. _whatsnew_0210.bug_fixes: diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 71cd4790ac364..ef1dc4d971f37 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -130,6 +130,10 @@ def _create_categorical(self, data, categories=None, ordered=None): ------- Categorical """ + if (isinstance(data, (ABCSeries, type(self))) and + is_categorical_dtype(data)): + data = data.values + if not isinstance(data, ABCCategorical): ordered = False if ordered is None else ordered from pandas.core.categorical import Categorical diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index aac68ebd6abed..cf365465763fa 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -125,6 +125,16 @@ def test_construction_with_dtype(self): result = CategoricalIndex(idx, categories=idx, ordered=True) tm.assert_index_equal(result, expected, exact=True) + def test_create_categorical(self): + # https://github.com/pandas-dev/pandas/pull/17513 + # The public CI constructor doesn't hit this code path with + # instances of CategoricalIndex, but we still want to test the code + ci = CategoricalIndex(['a', 'b', 'c']) + # First ci is self, second ci is data. + result = CategoricalIndex._create_categorical(ci, ci) + expected = Categorical(['a', 'b', 'c']) + tm.assert_categorical_equal(result, expected) + def test_disallow_set_ops(self): # GH 10039