From 951ab2b2ec3de8e24f1d053de551d89b059eda22 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 14 Dec 2020 13:20:11 -0800 Subject: [PATCH 1/2] BUG: disallow scalar in Categorical constructor --- doc/source/whatsnew/v1.3.0.rst | 2 +- pandas/core/arrays/categorical.py | 4 ++++ .../tests/arrays/categorical/test_constructors.py | 15 +++++++++------ pandas/tests/extension/test_categorical.py | 2 +- pandas/tests/series/methods/test_replace.py | 2 +- pandas/tests/test_algos.py | 8 ++++---- 6 files changed, 20 insertions(+), 13 deletions(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 020d3091929db..2535db6b58987 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -149,7 +149,7 @@ Bug fixes Categorical ^^^^^^^^^^^ - +- Bug in :class:`Categorical` constructor incorrectly accepting scalar values (:issue:`38433`) - - diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 27110fe1f8439..93f734374e7a2 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -313,6 +313,10 @@ def __init__( self._dtype = self._dtype.update_dtype(dtype) return + if not is_list_like(values): + # GH#38433 + raise TypeError("Categorical values must be list-like") + # null_mask indicates missing values we want to exclude from inference. # This means: only missing values in list-likes (not arrays/ndframes). null_mask = np.array(False) diff --git a/pandas/tests/arrays/categorical/test_constructors.py b/pandas/tests/arrays/categorical/test_constructors.py index 753c15bde6bba..c7e3cbe7115b4 100644 --- a/pandas/tests/arrays/categorical/test_constructors.py +++ b/pandas/tests/arrays/categorical/test_constructors.py @@ -26,6 +26,11 @@ class TestCategoricalConstructors: + def test_categorical_scalar_raises(self): + msg = "Categorical values must be list-like" + with pytest.raises(TypeError, match=msg): + Categorical("A", categories=["A", "B"]) + def test_validate_ordered(self): # see gh-14058 exp_msg = "'ordered' must either be 'True' or 'False'" @@ -202,12 +207,10 @@ def test_constructor(self): assert len(cat.codes) == 1 assert cat.codes[0] == 0 - # Scalars should be converted to lists - cat = Categorical(1) - assert len(cat.categories) == 1 - assert cat.categories[0] == 1 - assert len(cat.codes) == 1 - assert cat.codes[0] == 0 + # GH#38433 Scalars should be not converted to lists + msg = "Categorical values must be list-like" + with pytest.raises(TypeError, match=msg): + Categorical(1) # two arrays # - when the first is an integer dtype and the second is not diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py index d03a9ab6b2588..520cfdcbd2f98 100644 --- a/pandas/tests/extension/test_categorical.py +++ b/pandas/tests/extension/test_categorical.py @@ -221,7 +221,7 @@ def test_cast_category_to_extension_dtype(self, expected): ) def test_consistent_casting(self, dtype, expected): # GH 28448 - result = Categorical("2015-01-01").astype(dtype) + result = Categorical(["2015-01-01"]).astype(dtype) assert result == expected diff --git a/pandas/tests/series/methods/test_replace.py b/pandas/tests/series/methods/test_replace.py index 6db226eb14a22..d2d564be88942 100644 --- a/pandas/tests/series/methods/test_replace.py +++ b/pandas/tests/series/methods/test_replace.py @@ -290,7 +290,7 @@ def test_replace_mixed_types_with_string(self): @pytest.mark.parametrize( "categorical, numeric", [ - (pd.Categorical("A", categories=["A", "B"]), [1]), + (pd.Categorical(["A"], categories=["A", "B"]), [1]), (pd.Categorical(("A",), categories=["A", "B"]), [1]), (pd.Categorical(("A", "B"), categories=["A", "B"]), [1, 2]), ], diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 35411d7e9cfb7..4aca967d71111 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -908,8 +908,8 @@ def test_categorical_from_codes(self): # GH 16639 vals = np.array([0, 1, 2, 0]) cats = ["a", "b", "c"] - Sd = Series(Categorical(1).from_codes(vals, cats)) - St = Series(Categorical(1).from_codes(np.array([0, 1]), cats)) + Sd = Series(Categorical([1]).from_codes(vals, cats)) + St = Series(Categorical([1]).from_codes(np.array([0, 1]), cats)) expected = np.array([True, True, False, True]) result = algos.isin(Sd, St) tm.assert_numpy_array_equal(expected, result) @@ -917,8 +917,8 @@ def test_categorical_from_codes(self): def test_categorical_isin(self): vals = np.array([0, 1, 2, 0]) cats = ["a", "b", "c"] - cat = Categorical(1).from_codes(vals, cats) - other = Categorical(1).from_codes(np.array([0, 1]), cats) + cat = Categorical([1]).from_codes(vals, cats) + other = Categorical([1]).from_codes(np.array([0, 1]), cats) expected = np.array([True, True, False, True]) result = algos.isin(cat, other) From 6baded31d39c5f53450e994bab831802df56a556 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 22 Dec 2020 13:04:16 -0800 Subject: [PATCH 2/2] deprecate --- doc/source/whatsnew/v1.3.0.rst | 3 +-- pandas/core/arrays/categorical.py | 8 +++++++- .../arrays/categorical/test_constructors.py | 18 ++++++++++-------- 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 79c6ddd8b85ba..a9af4174840e5 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -143,7 +143,7 @@ Other API changes Deprecations ~~~~~~~~~~~~ - +- Deprecating allowing scalars passed to the :class:`Categorical` constructor (:issue:`38433`) - - @@ -171,7 +171,6 @@ Bug fixes Categorical ^^^^^^^^^^^ - Bug in ``CategoricalIndex.reindex`` failed when ``Index`` passed with elements all in category (:issue:`28690`) -- Bug in :class:`Categorical` constructor incorrectly accepting scalar values (:issue:`38433`) - Datetimelike diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index fceb3803d60d1..e61a4e1f70114 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -322,7 +322,13 @@ def __init__( if not is_list_like(values): # GH#38433 - raise TypeError("Categorical values must be list-like") + warn( + "Allowing scalars in the Categorical constructor is deprecated " + "and will raise in a future version. Use `[value]` instead", + FutureWarning, + stacklevel=2, + ) + values = [values] # null_mask indicates missing values we want to exclude from inference. # This means: only missing values in list-likes (not arrays/ndframes). diff --git a/pandas/tests/arrays/categorical/test_constructors.py b/pandas/tests/arrays/categorical/test_constructors.py index e2d6920a41b18..8459ad6f8e282 100644 --- a/pandas/tests/arrays/categorical/test_constructors.py +++ b/pandas/tests/arrays/categorical/test_constructors.py @@ -26,9 +26,9 @@ class TestCategoricalConstructors: - def test_categorical_scalar_raises(self): - msg = "Categorical values must be list-like" - with pytest.raises(TypeError, match=msg): + def test_categorical_scalar_deprecated(self): + # GH#38433 + with tm.assert_produces_warning(FutureWarning): Categorical("A", categories=["A", "B"]) def test_validate_ordered(self): @@ -207,11 +207,13 @@ def test_constructor(self): assert len(cat.codes) == 1 assert cat.codes[0] == 0 - # GH#38433 Scalars should be not converted to lists - msg = "Categorical values must be list-like" - with pytest.raises(TypeError, match=msg): - Categorical(1) - + with tm.assert_produces_warning(FutureWarning): + # GH#38433 + cat = Categorical(1) + assert len(cat.categories) == 1 + assert cat.categories[0] == 1 + assert len(cat.codes) == 1 + assert cat.codes[0] == 0 # two arrays # - when the first is an integer dtype and the second is not # - when the resulting codes are all -1/NaN