diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst index f4bb8c580fb08..8f04032e1ca09 100644 --- a/doc/source/whatsnew/v1.0.2.rst +++ b/doc/source/whatsnew/v1.0.2.rst @@ -28,6 +28,10 @@ Fixed regressions Bug fixes ~~~~~~~~~ +**Categorical** + +- Fixed bug where :meth:`Categorical.from_codes` improperly raised a ``ValueError`` when passed nullable integer codes. (:issue:`31779`) + **I/O** - Using ``pd.NA`` with :meth:`DataFrame.to_json` now correctly outputs a null value instead of an empty object (:issue:`31615`) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index d26ff7490e714..0e04354ae7c89 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -644,7 +644,13 @@ def from_codes(cls, codes, categories=None, ordered=None, dtype=None): ) raise ValueError(msg) - codes = np.asarray(codes) # #21767 + if is_extension_array_dtype(codes) and is_integer_dtype(codes): + # Avoid the implicit conversion of Int to object + if isna(codes).any(): + raise ValueError("codes cannot contain NA values") + codes = codes.to_numpy(dtype=np.int64) + else: + codes = np.asarray(codes) if len(codes) and not is_integer_dtype(codes): raise ValueError("codes need to be array-like integers") diff --git a/pandas/tests/arrays/categorical/test_constructors.py b/pandas/tests/arrays/categorical/test_constructors.py index 70e1421c8dcf4..dbd8fd8df67c1 100644 --- a/pandas/tests/arrays/categorical/test_constructors.py +++ b/pandas/tests/arrays/categorical/test_constructors.py @@ -560,6 +560,23 @@ def test_from_codes_neither(self): with pytest.raises(ValueError, match=msg): Categorical.from_codes([0, 1]) + def test_from_codes_with_nullable_int(self): + codes = pd.array([0, 1], dtype="Int64") + categories = ["a", "b"] + + result = Categorical.from_codes(codes, categories=categories) + expected = Categorical.from_codes(codes.to_numpy(int), categories=categories) + + tm.assert_categorical_equal(result, expected) + + def test_from_codes_with_nullable_int_na_raises(self): + codes = pd.array([0, None], dtype="Int64") + categories = ["a", "b"] + + msg = "codes cannot contain NA values" + with pytest.raises(ValueError, match=msg): + Categorical.from_codes(codes, categories=categories) + @pytest.mark.parametrize("dtype", [None, "category"]) def test_from_inferred_categories(self, dtype): cats = ["a", "b"]