diff --git a/asv_bench/benchmarks/categoricals.py b/asv_bench/benchmarks/categoricals.py index 0ffd5f881d626..ae1d7029217a4 100644 --- a/asv_bench/benchmarks/categoricals.py +++ b/asv_bench/benchmarks/categoricals.py @@ -51,6 +51,7 @@ def setup(self): self.values_some_nan = list(np.tile(self.categories + [np.nan], N)) self.values_all_nan = [np.nan] * len(self.values) + self.values_all_int8 = np.ones(N, 'int8') def time_regular(self): pd.Categorical(self.values, self.categories) @@ -70,6 +71,9 @@ def time_with_nan(self): def time_all_nan(self): pd.Categorical(self.values_all_nan) + def time_from_codes_all_int8(self): + pd.Categorical.from_codes(self.values_all_int8, self.categories) + class ValueCounts(object): diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index d10d51352d0e4..99b931db99c2c 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -1079,6 +1079,7 @@ Performance Improvements - Improved performance of :func:`Series.isin` in the case of categorical dtypes (:issue:`20003`) - Improved performance of ``getattr(Series, attr)`` when the Series has certain index types. This manifiested in slow printing of large Series with a ``DatetimeIndex`` (:issue:`19764`) - Fixed a performance regression for :func:`GroupBy.nth` and :func:`GroupBy.last` with some object columns (:issue:`19283`) +- Improved performance of :func:`pandas.core.arrays.Categorical.from_codes` (:issue:`18501`) .. _whatsnew_0230.docs: diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index f91782459df67..abcb9ae3494b5 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -578,7 +578,7 @@ def from_codes(cls, codes, categories, ordered=False): unordered. """ try: - codes = np.asarray(codes, np.int64) + codes = coerce_indexer_dtype(np.asarray(codes), categories) except (ValueError, TypeError): raise ValueError( "codes need to be convertible to an arrays of integers")