Skip to content

Commit 4492979

Browse files
authored
REF: Categorical.is_dtype_equal -> categories_match_up_to_permutation (#37545)
1 parent 4bad8cb commit 4492979

File tree

7 files changed

+50
-28
lines changed

7 files changed

+50
-28
lines changed

doc/source/whatsnew/v1.2.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -340,6 +340,7 @@ Deprecations
340340
- :meth:`Index.ravel` returning a ``np.ndarray`` is deprecated, in the future this will return a view on the same index (:issue:`19956`)
341341
- Deprecate use of strings denoting units with 'M', 'Y' or 'y' in :func:`~pandas.to_timedelta` (:issue:`36666`)
342342
- :class:`Index` methods ``&``, ``|``, and ``^`` behaving as the set operations :meth:`Index.intersection`, :meth:`Index.union`, and :meth:`Index.symmetric_difference`, respectively, are deprecated and in the future will behave as pointwise boolean operations matching :class:`Series` behavior. Use the named set methods instead (:issue:`36758`)
343+
- :meth:`Categorical.is_dtype_equal` and :meth:`CategoricalIndex.is_dtype_equal` are deprecated, will be removed in a future version (:issue:`37545`)
343344

344345
.. ---------------------------------------------------------------------------
345346

pandas/core/arrays/categorical.py

+15-5
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ def func(self, other):
7878
# the same (maybe up to ordering, depending on ordered)
7979

8080
msg = "Categoricals can only be compared if 'categories' are the same."
81-
if not self.is_dtype_equal(other):
81+
if not self._categories_match_up_to_permutation(other):
8282
raise TypeError(msg)
8383

8484
if not self.ordered and not self.categories.equals(other.categories):
@@ -1869,11 +1869,12 @@ def _validate_setitem_value(self, value):
18691869

18701870
# require identical categories set
18711871
if isinstance(value, Categorical):
1872-
if not is_dtype_equal(self, value):
1872+
if not is_dtype_equal(self.dtype, value.dtype):
18731873
raise ValueError(
18741874
"Cannot set a Categorical with another, "
18751875
"without identical categories"
18761876
)
1877+
# is_dtype_equal implies categories_match_up_to_permutation
18771878
new_codes = self._validate_listlike(value)
18781879
value = Categorical.from_codes(new_codes, dtype=self.dtype)
18791880

@@ -2107,7 +2108,7 @@ def equals(self, other: object) -> bool:
21072108
"""
21082109
if not isinstance(other, Categorical):
21092110
return False
2110-
elif self.is_dtype_equal(other):
2111+
elif self._categories_match_up_to_permutation(other):
21112112
other_codes = self._validate_listlike(other)
21122113
return np.array_equal(self._codes, other_codes)
21132114
return False
@@ -2120,7 +2121,7 @@ def _concat_same_type(self, to_concat):
21202121

21212122
# ------------------------------------------------------------------
21222123

2123-
def is_dtype_equal(self, other):
2124+
def _categories_match_up_to_permutation(self, other: "Categorical") -> bool:
21242125
"""
21252126
Returns True if categoricals are the same dtype
21262127
same categories, and same ordered
@@ -2133,8 +2134,17 @@ def is_dtype_equal(self, other):
21332134
-------
21342135
bool
21352136
"""
2137+
return hash(self.dtype) == hash(other.dtype)
2138+
2139+
def is_dtype_equal(self, other) -> bool:
2140+
warn(
2141+
"Categorical.is_dtype_equal is deprecated and will be removed "
2142+
"in a future version",
2143+
FutureWarning,
2144+
stacklevel=2,
2145+
)
21362146
try:
2137-
return hash(self.dtype) == hash(other.dtype)
2147+
return self._categories_match_up_to_permutation(other)
21382148
except (AttributeError, TypeError):
21392149
return False
21402150

pandas/core/dtypes/concat.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -296,7 +296,7 @@ def _maybe_unwrap(x):
296296
raise TypeError("dtype of categories must be the same")
297297

298298
ordered = False
299-
if all(first.is_dtype_equal(other) for other in to_union[1:]):
299+
if all(first._categories_match_up_to_permutation(other) for other in to_union[1:]):
300300
# identical categories - fastpath
301301
categories = first.categories
302302
ordered = first.ordered

pandas/core/indexes/category.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -255,7 +255,7 @@ def _is_dtype_compat(self, other) -> Categorical:
255255
"""
256256
if is_categorical_dtype(other):
257257
other = extract_array(other)
258-
if not other.is_dtype_equal(self):
258+
if not other._categories_match_up_to_permutation(self):
259259
raise TypeError(
260260
"categories must match existing categories when appending"
261261
)

pandas/core/reshape/merge.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1083,7 +1083,7 @@ def _maybe_coerce_merge_keys(self):
10831083
# if either left or right is a categorical
10841084
# then the must match exactly in categories & ordered
10851085
if lk_is_cat and rk_is_cat:
1086-
if lk.is_dtype_equal(rk):
1086+
if lk._categories_match_up_to_permutation(rk):
10871087
continue
10881088

10891089
elif lk_is_cat or rk_is_cat:

pandas/tests/arrays/categorical/test_dtypes.py

+28-17
Original file line numberDiff line numberDiff line change
@@ -8,34 +8,45 @@
88

99

1010
class TestCategoricalDtypes:
11-
def test_is_equal_dtype(self):
11+
def test_is_dtype_equal_deprecated(self):
12+
# GH#37545
13+
c1 = Categorical(list("aabca"), categories=list("abc"), ordered=False)
14+
15+
with tm.assert_produces_warning(FutureWarning):
16+
c1.is_dtype_equal(c1)
17+
18+
def test_categories_match_up_to_permutation(self):
1219

1320
# test dtype comparisons between cats
1421

1522
c1 = Categorical(list("aabca"), categories=list("abc"), ordered=False)
1623
c2 = Categorical(list("aabca"), categories=list("cab"), ordered=False)
1724
c3 = Categorical(list("aabca"), categories=list("cab"), ordered=True)
18-
assert c1.is_dtype_equal(c1)
19-
assert c2.is_dtype_equal(c2)
20-
assert c3.is_dtype_equal(c3)
21-
assert c1.is_dtype_equal(c2)
22-
assert not c1.is_dtype_equal(c3)
23-
assert not c1.is_dtype_equal(Index(list("aabca")))
24-
assert not c1.is_dtype_equal(c1.astype(object))
25-
assert c1.is_dtype_equal(CategoricalIndex(c1))
26-
assert c1.is_dtype_equal(CategoricalIndex(c1, categories=list("cab")))
27-
assert not c1.is_dtype_equal(CategoricalIndex(c1, ordered=True))
25+
assert c1._categories_match_up_to_permutation(c1)
26+
assert c2._categories_match_up_to_permutation(c2)
27+
assert c3._categories_match_up_to_permutation(c3)
28+
assert c1._categories_match_up_to_permutation(c2)
29+
assert not c1._categories_match_up_to_permutation(c3)
30+
assert not c1._categories_match_up_to_permutation(Index(list("aabca")))
31+
assert not c1._categories_match_up_to_permutation(c1.astype(object))
32+
assert c1._categories_match_up_to_permutation(CategoricalIndex(c1))
33+
assert c1._categories_match_up_to_permutation(
34+
CategoricalIndex(c1, categories=list("cab"))
35+
)
36+
assert not c1._categories_match_up_to_permutation(
37+
CategoricalIndex(c1, ordered=True)
38+
)
2839

2940
# GH 16659
3041
s1 = Series(c1)
3142
s2 = Series(c2)
3243
s3 = Series(c3)
33-
assert c1.is_dtype_equal(s1)
34-
assert c2.is_dtype_equal(s2)
35-
assert c3.is_dtype_equal(s3)
36-
assert c1.is_dtype_equal(s2)
37-
assert not c1.is_dtype_equal(s3)
38-
assert not c1.is_dtype_equal(s1.astype(object))
44+
assert c1._categories_match_up_to_permutation(s1)
45+
assert c2._categories_match_up_to_permutation(s2)
46+
assert c3._categories_match_up_to_permutation(s3)
47+
assert c1._categories_match_up_to_permutation(s2)
48+
assert not c1._categories_match_up_to_permutation(s3)
49+
assert not c1._categories_match_up_to_permutation(s1.astype(object))
3950

4051
def test_set_dtype_same(self):
4152
c = Categorical(["a", "b", "c"])

pandas/tests/reshape/merge/test_merge.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -1707,8 +1707,8 @@ def test_other_columns(self, left, right):
17071707
tm.assert_series_equal(result, expected)
17081708

17091709
# categories are preserved
1710-
assert left.X.values.is_dtype_equal(merged.X.values)
1711-
assert right.Z.values.is_dtype_equal(merged.Z.values)
1710+
assert left.X.values._categories_match_up_to_permutation(merged.X.values)
1711+
assert right.Z.values._categories_match_up_to_permutation(merged.Z.values)
17121712

17131713
@pytest.mark.parametrize(
17141714
"change",
@@ -1725,7 +1725,7 @@ def test_dtype_on_merged_different(self, change, join_type, left, right):
17251725
X = change(right.X.astype("object"))
17261726
right = right.assign(X=X)
17271727
assert is_categorical_dtype(left.X.values.dtype)
1728-
# assert not left.X.values.is_dtype_equal(right.X.values)
1728+
# assert not left.X.values._categories_match_up_to_permutation(right.X.values)
17291729

17301730
merged = pd.merge(left, right, on="X", how=join_type)
17311731

0 commit comments

Comments
 (0)