diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 4c9817f3c3dc6..afd6bbb6c57e0 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -51,6 +51,7 @@ ABCExtensionArray, ABCIndexClass, ABCMultiIndex, + ABCRangeIndex, ABCSeries, ) from pandas.core.dtypes.missing import isna, na_value_for_dtype @@ -682,7 +683,9 @@ def factorize( na_sentinel = -1 dropna = False - if is_extension_array_dtype(values.dtype): + if isinstance(values, ABCRangeIndex): + return values.factorize(sort=sort) + elif is_extension_array_dtype(values.dtype): values = extract_array(values) codes, uniques = values.factorize(na_sentinel=na_sentinel) dtype = original.dtype diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 8e12f84895361..3222f44dd4415 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -1,7 +1,7 @@ from datetime import timedelta import operator from sys import getsizeof -from typing import Any, List +from typing import Any, List, Optional, Tuple import warnings import numpy as np @@ -461,6 +461,16 @@ def argsort(self, *args, **kwargs) -> np.ndarray: else: return np.arange(len(self) - 1, -1, -1) + def factorize( + self, sort: bool = False, na_sentinel: Optional[int] = -1 + ) -> Tuple[np.ndarray, "RangeIndex"]: + codes = np.arange(len(self), dtype=np.intp) + uniques = self + if sort and self.step < 0: + codes = codes[::-1] + uniques = uniques[::-1] + return codes, uniques + def equals(self, other: object) -> bool: """ Determines if two Index objects contain the same elements. diff --git a/pandas/tests/arrays/categorical/test_constructors.py b/pandas/tests/arrays/categorical/test_constructors.py index dab7fc51f2537..753c15bde6bba 100644 --- a/pandas/tests/arrays/categorical/test_constructors.py +++ b/pandas/tests/arrays/categorical/test_constructors.py @@ -290,6 +290,16 @@ def test_constructor_with_generator(self): cat = Categorical([0, 1, 2], categories=range(3)) tm.assert_categorical_equal(cat, exp) + def test_constructor_with_rangeindex(self): + # RangeIndex is preserved in Categories + rng = Index(range(3)) + + cat = Categorical(rng) + tm.assert_index_equal(cat.categories, rng, exact=True) + + cat = Categorical([1, 2, 0], categories=rng) + tm.assert_index_equal(cat.categories, rng, exact=True) + @pytest.mark.parametrize( "dtl", [ diff --git a/pandas/tests/indexes/multi/test_constructors.py b/pandas/tests/indexes/multi/test_constructors.py index 85f3d17fdd0d4..ca6387938d747 100644 --- a/pandas/tests/indexes/multi/test_constructors.py +++ b/pandas/tests/indexes/multi/test_constructors.py @@ -477,6 +477,14 @@ def test_from_product_datetimeindex(): tm.assert_numpy_array_equal(mi.values, etalon) +def test_from_product_rangeindex(): + # RangeIndex is preserved by factorize, so preserved in levels + rng = Index(range(5)) + other = ["a", "b"] + mi = MultiIndex.from_product([rng, other]) + tm.assert_index_equal(mi._levels[0], rng, exact=True) + + @pytest.mark.parametrize("ordered", [False, True]) @pytest.mark.parametrize("f", [lambda x: x, lambda x: Series(x), lambda x: x.values]) def test_from_product_index_series_categorical(ordered, f): diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 89d0a6723c890..d836ca7a53249 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -307,6 +307,39 @@ def test_datetime64_factorize(self, writable): tm.assert_numpy_array_equal(codes, expected_codes) tm.assert_numpy_array_equal(uniques, expected_uniques) + @pytest.mark.parametrize("sort", [True, False]) + def test_factorize_rangeindex(self, sort): + # increasing -> sort doesn't matter + ri = pd.RangeIndex.from_range(range(10)) + expected = np.arange(10, dtype=np.intp), ri + + result = algos.factorize(ri, sort=sort) + tm.assert_numpy_array_equal(result[0], expected[0]) + tm.assert_index_equal(result[1], expected[1], exact=True) + + result = ri.factorize(sort=sort) + tm.assert_numpy_array_equal(result[0], expected[0]) + tm.assert_index_equal(result[1], expected[1], exact=True) + + @pytest.mark.parametrize("sort", [True, False]) + def test_factorize_rangeindex_decreasing(self, sort): + # decreasing -> sort matters + ri = pd.RangeIndex.from_range(range(10)) + expected = np.arange(10, dtype=np.intp), ri + + ri2 = ri[::-1] + expected = expected[0], ri2 + if sort: + expected = expected[0][::-1], expected[1][::-1] + + result = algos.factorize(ri2, sort=sort) + tm.assert_numpy_array_equal(result[0], expected[0]) + tm.assert_index_equal(result[1], expected[1], exact=True) + + result = ri2.factorize(sort=sort) + tm.assert_numpy_array_equal(result[0], expected[0]) + tm.assert_index_equal(result[1], expected[1], exact=True) + def test_deprecate_order(self): # gh 19727 - check warning is raised for deprecated keyword, order. # Test not valid once order keyword is removed.