Skip to content

Commit 16684f2

Browse files
authored
BUG: Avoid casting Int to object in Categorical.from_codes (#31794)
1 parent 634a41f commit 16684f2

File tree

3 files changed

+28
-1
lines changed

3 files changed

+28
-1
lines changed

doc/source/whatsnew/v1.0.2.rst

+4
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,10 @@ Fixed regressions
2828
Bug fixes
2929
~~~~~~~~~
3030

31+
**Categorical**
32+
33+
- Fixed bug where :meth:`Categorical.from_codes` improperly raised a ``ValueError`` when passed nullable integer codes. (:issue:`31779`)
34+
3135
**I/O**
3236

3337
- Using ``pd.NA`` with :meth:`DataFrame.to_json` now correctly outputs a null value instead of an empty object (:issue:`31615`)

pandas/core/arrays/categorical.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -644,7 +644,13 @@ def from_codes(cls, codes, categories=None, ordered=None, dtype=None):
644644
)
645645
raise ValueError(msg)
646646

647-
codes = np.asarray(codes) # #21767
647+
if is_extension_array_dtype(codes) and is_integer_dtype(codes):
648+
# Avoid the implicit conversion of Int to object
649+
if isna(codes).any():
650+
raise ValueError("codes cannot contain NA values")
651+
codes = codes.to_numpy(dtype=np.int64)
652+
else:
653+
codes = np.asarray(codes)
648654
if len(codes) and not is_integer_dtype(codes):
649655
raise ValueError("codes need to be array-like integers")
650656

pandas/tests/arrays/categorical/test_constructors.py

+17
Original file line numberDiff line numberDiff line change
@@ -560,6 +560,23 @@ def test_from_codes_neither(self):
560560
with pytest.raises(ValueError, match=msg):
561561
Categorical.from_codes([0, 1])
562562

563+
def test_from_codes_with_nullable_int(self):
564+
codes = pd.array([0, 1], dtype="Int64")
565+
categories = ["a", "b"]
566+
567+
result = Categorical.from_codes(codes, categories=categories)
568+
expected = Categorical.from_codes(codes.to_numpy(int), categories=categories)
569+
570+
tm.assert_categorical_equal(result, expected)
571+
572+
def test_from_codes_with_nullable_int_na_raises(self):
573+
codes = pd.array([0, None], dtype="Int64")
574+
categories = ["a", "b"]
575+
576+
msg = "codes cannot contain NA values"
577+
with pytest.raises(ValueError, match=msg):
578+
Categorical.from_codes(codes, categories=categories)
579+
563580
@pytest.mark.parametrize("dtype", [None, "category"])
564581
def test_from_inferred_categories(self, dtype):
565582
cats = ["a", "b"]

0 commit comments

Comments
 (0)