Skip to content

Commit d6cb249

Browse files
authored
BUG: RangeIndex.astype('category') (#41263)
1 parent 9806b75 commit d6cb249

File tree

6 files changed

+48
-20
lines changed

6 files changed

+48
-20
lines changed

doc/source/whatsnew/v1.3.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -780,6 +780,7 @@ Indexing
780780
- Bug in :meth:`DatetimeIndex.insert` when inserting ``np.datetime64("NaT")`` into a timezone-aware index incorrectly treating the timezone-naive value as timezone-aware (:issue:`39769`)
781781
- Bug in incorrectly raising in :meth:`Index.insert`, when setting a new column that cannot be held in the existing ``frame.columns``, or in :meth:`Series.reset_index` or :meth:`DataFrame.reset_index` instead of casting to a compatible dtype (:issue:`39068`)
782782
- Bug in :meth:`RangeIndex.append` where a single object of length 1 was concatenated incorrectly (:issue:`39401`)
783+
- Bug in :meth:`RangeIndex.astype` where when converting to :class:`CategoricalIndex`, the categories became a :class:`Int64Index` instead of a :class:`RangeIndex` (:issue:`41263`)
783784
- Bug in setting ``numpy.timedelta64`` values into an object-dtype :class:`Series` using a boolean indexer (:issue:`39488`)
784785
- Bug in setting numeric values into a into a boolean-dtypes :class:`Series` using ``at`` or ``iat`` failing to cast to object-dtype (:issue:`39582`)
785786
- Bug in :meth:`DataFrame.__setitem__` and :meth:`DataFrame.iloc.__setitem__` raising ``ValueError`` when trying to index with a row-slice and setting a list as values (:issue:`40440`)
@@ -945,6 +946,7 @@ Other
945946
- Bug in :meth:`Series.where` with numeric dtype and ``other = None`` not casting to ``nan`` (:issue:`39761`)
946947
- :meth:`Index.where` behavior now mirrors :meth:`Index.putmask` behavior, i.e. ``index.where(mask, other)`` matches ``index.putmask(~mask, other)`` (:issue:`39412`)
947948
- Bug in :func:`pandas.testing.assert_series_equal`, :func:`pandas.testing.assert_frame_equal`, :func:`pandas.testing.assert_index_equal` and :func:`pandas.testing.assert_extension_array_equal` incorrectly raising when an attribute has an unrecognized NA type (:issue:`39461`)
949+
- Bug in :func:`pandas.testing.assert_index_equal` with ``exact=True`` not raising when comparing :class:`CategoricalIndex` instances with ``Int64Index`` and ``RangeIndex`` categories (:issue:`41263`)
948950
- Bug in :meth:`DataFrame.equals`, :meth:`Series.equals`, :meth:`Index.equals` with object-dtype containing ``np.datetime64("NaT")`` or ``np.timedelta64("NaT")`` (:issue:`39650`)
949951
- Bug in :func:`pandas.util.show_versions` where console JSON output was not proper JSON (:issue:`39701`)
950952
- Bug in :meth:`DataFrame.convert_dtypes` incorrectly raised ValueError when called on an empty DataFrame (:issue:`40393`)

pandas/_testing/asserters.py

+14-10
Original file line numberDiff line numberDiff line change
@@ -309,18 +309,22 @@ def assert_index_equal(
309309
__tracebackhide__ = True
310310

311311
def _check_types(left, right, obj="Index"):
312-
if exact:
313-
assert_class_equal(left, right, exact=exact, obj=obj)
312+
if not exact:
313+
return
314314

315-
# Skip exact dtype checking when `check_categorical` is False
316-
if check_categorical:
317-
assert_attr_equal("dtype", left, right, obj=obj)
315+
assert_class_equal(left, right, exact=exact, obj=obj)
318316

319-
# allow string-like to have different inferred_types
320-
if left.inferred_type in ("string"):
321-
assert right.inferred_type in ("string")
322-
else:
323-
assert_attr_equal("inferred_type", left, right, obj=obj)
317+
# Skip exact dtype checking when `check_categorical` is False
318+
if check_categorical:
319+
assert_attr_equal("dtype", left, right, obj=obj)
320+
if is_categorical_dtype(left.dtype) and is_categorical_dtype(right.dtype):
321+
assert_index_equal(left.categories, right.categories, exact=exact)
322+
323+
# allow string-like to have different inferred_types
324+
if left.inferred_type in ("string"):
325+
assert right.inferred_type in ("string")
326+
else:
327+
assert_attr_equal("inferred_type", left, right, obj=obj)
324328

325329
def _get_ilevel_values(index, level):
326330
# accept level number only

pandas/core/indexes/base.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -907,9 +907,7 @@ def astype(self, dtype, copy=True):
907907
elif is_categorical_dtype(dtype):
908908
from pandas.core.indexes.category import CategoricalIndex
909909

910-
return CategoricalIndex(
911-
self._values, name=self.name, dtype=dtype, copy=copy
912-
)
910+
return CategoricalIndex(self, name=self.name, dtype=dtype, copy=copy)
913911

914912
elif is_extension_array_dtype(dtype):
915913
return Index(np.asarray(self), name=self.name, dtype=dtype, copy=copy)

pandas/tests/indexes/categorical/test_constructors.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -108,8 +108,8 @@ def test_construction_with_dtype(self):
108108
tm.assert_index_equal(result, ci, exact=True)
109109

110110
# make sure indexes are handled
111-
expected = CategoricalIndex([0, 1, 2], categories=[0, 1, 2], ordered=True)
112111
idx = Index(range(3))
112+
expected = CategoricalIndex([0, 1, 2], categories=idx, ordered=True)
113113
result = CategoricalIndex(idx, categories=idx, ordered=True)
114114
tm.assert_index_equal(result, expected, exact=True)
115115

pandas/tests/indexes/common.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -672,20 +672,20 @@ def test_astype_category(self, copy, name, ordered, simple_index):
672672
# standard categories
673673
dtype = CategoricalDtype(ordered=ordered)
674674
result = idx.astype(dtype, copy=copy)
675-
expected = CategoricalIndex(idx.values, name=name, ordered=ordered)
676-
tm.assert_index_equal(result, expected)
675+
expected = CategoricalIndex(idx, name=name, ordered=ordered)
676+
tm.assert_index_equal(result, expected, exact=True)
677677

678678
# non-standard categories
679679
dtype = CategoricalDtype(idx.unique().tolist()[:-1], ordered)
680680
result = idx.astype(dtype, copy=copy)
681-
expected = CategoricalIndex(idx.values, name=name, dtype=dtype)
682-
tm.assert_index_equal(result, expected)
681+
expected = CategoricalIndex(idx, name=name, dtype=dtype)
682+
tm.assert_index_equal(result, expected, exact=True)
683683

684684
if ordered is False:
685685
# dtype='category' defaults to ordered=False, so only test once
686686
result = idx.astype("category", copy=copy)
687-
expected = CategoricalIndex(idx.values, name=name)
688-
tm.assert_index_equal(result, expected)
687+
expected = CategoricalIndex(idx, name=name)
688+
tm.assert_index_equal(result, expected, exact=True)
689689

690690
def test_is_unique(self, simple_index):
691691
# initialize a unique index

pandas/tests/util/test_assert_index_equal.py

+24
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,11 @@
33

44
from pandas import (
55
Categorical,
6+
CategoricalIndex,
67
Index,
78
MultiIndex,
89
NaT,
10+
RangeIndex,
911
)
1012
import pandas._testing as tm
1113

@@ -199,6 +201,28 @@ def test_index_equal_category_mismatch(check_categorical):
199201
tm.assert_index_equal(idx1, idx2, check_categorical=check_categorical)
200202

201203

204+
@pytest.mark.parametrize("exact", [False, True])
205+
def test_index_equal_range_categories(check_categorical, exact):
206+
# GH41263
207+
msg = """\
208+
Index are different
209+
210+
Index classes are different
211+
\\[left\\]: RangeIndex\\(start=0, stop=10, step=1\\)
212+
\\[right\\]: Int64Index\\(\\[0, 1, 2, 3, 4, 5, 6, 7, 8, 9\\], dtype='int64'\\)"""
213+
214+
rcat = CategoricalIndex(RangeIndex(10))
215+
icat = CategoricalIndex(list(range(10)))
216+
217+
if check_categorical and exact:
218+
with pytest.raises(AssertionError, match=msg):
219+
tm.assert_index_equal(rcat, icat, check_categorical=True, exact=True)
220+
else:
221+
tm.assert_index_equal(
222+
rcat, icat, check_categorical=check_categorical, exact=exact
223+
)
224+
225+
202226
def test_assert_index_equal_mixed_dtype():
203227
# GH#39168
204228
idx = Index(["foo", "bar", 42])

0 commit comments

Comments
 (0)