diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 64b9a11b1980d..15f4a2677bf57 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -144,6 +144,7 @@ Other API changes Deprecations ~~~~~~~~~~~~ +- Deprecating allowing scalars passed to the :class:`Categorical` constructor (:issue:`38433`) - Deprecated allowing subclass-specific keyword arguments in the :class:`Index` constructor, use the specific subclass directly instead (:issue:`14093`,:issue:`21311`,:issue:`22315`,:issue:`26974`) - - diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 002f36f7949e5..940c56340f75e 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -320,6 +320,16 @@ def __init__( self._dtype = self._dtype.update_dtype(dtype) return + if not is_list_like(values): + # GH#38433 + warn( + "Allowing scalars in the Categorical constructor is deprecated " + "and will raise in a future version. Use `[value]` instead", + FutureWarning, + stacklevel=2, + ) + values = [values] + # null_mask indicates missing values we want to exclude from inference. # This means: only missing values in list-likes (not arrays/ndframes). null_mask = np.array(False) diff --git a/pandas/tests/arrays/categorical/test_constructors.py b/pandas/tests/arrays/categorical/test_constructors.py index 59d4700874810..924a20c7e6490 100644 --- a/pandas/tests/arrays/categorical/test_constructors.py +++ b/pandas/tests/arrays/categorical/test_constructors.py @@ -26,6 +26,11 @@ class TestCategoricalConstructors: + def test_categorical_scalar_deprecated(self): + # GH#38433 + with tm.assert_produces_warning(FutureWarning): + Categorical("A", categories=["A", "B"]) + def test_validate_ordered(self): # see gh-14058 exp_msg = "'ordered' must either be 'True' or 'False'" @@ -202,13 +207,13 @@ def test_constructor(self): assert len(cat.codes) == 1 assert cat.codes[0] == 0 - # Scalars should be converted to lists - cat = Categorical(1) + with tm.assert_produces_warning(FutureWarning): + # GH#38433 + cat = Categorical(1) assert len(cat.categories) == 1 assert cat.categories[0] == 1 assert len(cat.codes) == 1 assert cat.codes[0] == 0 - # two arrays # - when the first is an integer dtype and the second is not # - when the resulting codes are all -1/NaN diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py index 4a0fb8f81ed56..493cb979494c8 100644 --- a/pandas/tests/extension/test_categorical.py +++ b/pandas/tests/extension/test_categorical.py @@ -223,7 +223,7 @@ def test_cast_category_to_extension_dtype(self, expected): ) def test_consistent_casting(self, dtype, expected): # GH 28448 - result = Categorical("2015-01-01").astype(dtype) + result = Categorical(["2015-01-01"]).astype(dtype) assert result == expected diff --git a/pandas/tests/series/methods/test_replace.py b/pandas/tests/series/methods/test_replace.py index 6db226eb14a22..d2d564be88942 100644 --- a/pandas/tests/series/methods/test_replace.py +++ b/pandas/tests/series/methods/test_replace.py @@ -290,7 +290,7 @@ def test_replace_mixed_types_with_string(self): @pytest.mark.parametrize( "categorical, numeric", [ - (pd.Categorical("A", categories=["A", "B"]), [1]), + (pd.Categorical(["A"], categories=["A", "B"]), [1]), (pd.Categorical(("A",), categories=["A", "B"]), [1]), (pd.Categorical(("A", "B"), categories=["A", "B"]), [1, 2]), ], diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 35411d7e9cfb7..4aca967d71111 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -908,8 +908,8 @@ def test_categorical_from_codes(self): # GH 16639 vals = np.array([0, 1, 2, 0]) cats = ["a", "b", "c"] - Sd = Series(Categorical(1).from_codes(vals, cats)) - St = Series(Categorical(1).from_codes(np.array([0, 1]), cats)) + Sd = Series(Categorical([1]).from_codes(vals, cats)) + St = Series(Categorical([1]).from_codes(np.array([0, 1]), cats)) expected = np.array([True, True, False, True]) result = algos.isin(Sd, St) tm.assert_numpy_array_equal(expected, result) @@ -917,8 +917,8 @@ def test_categorical_from_codes(self): def test_categorical_isin(self): vals = np.array([0, 1, 2, 0]) cats = ["a", "b", "c"] - cat = Categorical(1).from_codes(vals, cats) - other = Categorical(1).from_codes(np.array([0, 1]), cats) + cat = Categorical([1]).from_codes(vals, cats) + other = Categorical([1]).from_codes(np.array([0, 1]), cats) expected = np.array([True, True, False, True]) result = algos.isin(cat, other)