From 59634bf087f8d0c63170caea9420cbf2686e3757 Mon Sep 17 00:00:00 2001 From: Luke Manley Date: Tue, 25 Oct 2022 19:33:01 -0400 Subject: [PATCH 1/7] deprecate inplace arg in categorical methods --- pandas/core/arrays/categorical.py | 325 +++--------------- .../arrays/categorical/test_analytics.py | 42 --- pandas/tests/arrays/categorical/test_api.py | 87 +---- .../tests/arrays/categorical/test_indexing.py | 19 - 4 files changed, 44 insertions(+), 429 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index b3650173e41ef..2e0b3ceb3bd71 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -15,11 +15,7 @@ cast, overload, ) -from warnings import ( - catch_warnings, - simplefilter, - warn, -) +from warnings import warn import numpy as np @@ -31,10 +27,6 @@ lib, ) from pandas._libs.arrays import NDArrayBacked -from pandas._libs.lib import ( - NoDefault, - no_default, -) from pandas._typing import ( ArrayLike, AstypeArg, @@ -729,8 +721,6 @@ def categories(self) -> Index: unique and the number of items in the new categories must be the same as the number of items in the old categories. - Assigning to `categories` is a inplace operation! - Raises ------ ValueError @@ -748,17 +738,6 @@ def categories(self) -> Index: """ return self.dtype.categories - @categories.setter - def categories(self, categories) -> None: - warn( - "Setting categories in-place is deprecated and will raise in a " - "future version. Use rename_categories instead.", - FutureWarning, - stacklevel=find_stack_level(), - ) - - self._set_categories(categories) - @property def ordered(self) -> Ordered: """ @@ -839,24 +818,7 @@ def _set_dtype(self, dtype: CategoricalDtype) -> Categorical: codes = recode_for_categories(self.codes, self.categories, dtype.categories) return type(self)(codes, dtype=dtype, fastpath=True) - @overload - def set_ordered( - self, value, *, inplace: NoDefault | Literal[False] = ... - ) -> Categorical: - ... - - @overload - def set_ordered(self, value, *, inplace: Literal[True]) -> None: - ... - - @overload - def set_ordered(self, value, *, inplace: bool) -> Categorical | None: - ... - - @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "value"]) - def set_ordered( - self, value, inplace: bool | NoDefault = no_default - ) -> Categorical | None: + def set_ordered(self, value: bool) -> Categorical: """ Set the ordered attribute to the boolean value. @@ -864,98 +826,35 @@ def set_ordered( ---------- value : bool Set whether this categorical is ordered (True) or not (False). - inplace : bool, default False - Whether or not to set the ordered attribute in-place or return - a copy of this categorical with ordered set to the value. - - .. deprecated:: 1.5.0 - """ - if inplace is not no_default: - warn( - "The `inplace` parameter in pandas.Categorical." - "set_ordered is deprecated and will be removed in " - "a future version. setting ordered-ness on categories will always " - "return a new Categorical object.", - FutureWarning, - stacklevel=find_stack_level(), - ) - else: - inplace = False - - inplace = validate_bool_kwarg(inplace, "inplace") new_dtype = CategoricalDtype(self.categories, ordered=value) - cat = self if inplace else self.copy() + cat = self.copy() NDArrayBacked.__init__(cat, cat._ndarray, new_dtype) - if not inplace: - return cat - return None - - @overload - def as_ordered(self, *, inplace: NoDefault | Literal[False] = ...) -> Categorical: - ... - - @overload - def as_ordered(self, *, inplace: Literal[True]) -> None: - ... + return cat - @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) - def as_ordered(self, inplace: bool | NoDefault = no_default) -> Categorical | None: + def as_ordered(self) -> Categorical: """ Set the Categorical to be ordered. - Parameters - ---------- - inplace : bool, default False - Whether or not to set the ordered attribute in-place or return - a copy of this categorical with ordered set to True. - - .. deprecated:: 1.5.0 - Returns ------- - Categorical or None - Ordered Categorical or None if ``inplace=True``. + Categorical + Ordered Categorical. """ - if inplace is not no_default: - inplace = validate_bool_kwarg(inplace, "inplace") - return self.set_ordered(True, inplace=inplace) + return self.set_ordered(True) - @overload - def as_unordered(self, *, inplace: NoDefault | Literal[False] = ...) -> Categorical: - ... - - @overload - def as_unordered(self, *, inplace: Literal[True]) -> None: - ... - - @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) - def as_unordered( - self, inplace: bool | NoDefault = no_default - ) -> Categorical | None: + def as_unordered(self) -> Categorical: """ Set the Categorical to be unordered. - Parameters - ---------- - inplace : bool, default False - Whether or not to set the ordered attribute in-place or return - a copy of this categorical with ordered set to False. - - .. deprecated:: 1.5.0 - Returns ------- - Categorical or None - Unordered Categorical or None if ``inplace=True``. + Categorical + Unordered Categorical. """ - if inplace is not no_default: - inplace = validate_bool_kwarg(inplace, "inplace") - return self.set_ordered(False, inplace=inplace) + return self.set_ordered(False) - def set_categories( - self, new_categories, ordered=None, rename: bool = False, inplace=no_default - ): + def set_categories(self, new_categories, ordered=None, rename: bool = False): """ Set the categories to the specified new_categories. @@ -985,15 +884,10 @@ def set_categories( rename : bool, default False Whether or not the new_categories should be considered as a rename of the old categories or as reordered categories. - inplace : bool, default False - Whether or not to reorder the categories in-place or return a copy - of this categorical with reordered categories. - - .. deprecated:: 1.3.0 Returns ------- - Categorical with reordered categories or None if inplace. + Categorical with reordered categories. Raises ------ @@ -1008,24 +902,12 @@ def set_categories( remove_categories : Remove the specified categories. remove_unused_categories : Remove categories which are not used. """ - if inplace is not no_default: - warn( - "The `inplace` parameter in pandas.Categorical." - "set_categories is deprecated and will be removed in " - "a future version. Removing unused categories will always " - "return a new Categorical object.", - FutureWarning, - stacklevel=find_stack_level(), - ) - else: - inplace = False - inplace = validate_bool_kwarg(inplace, "inplace") if ordered is None: ordered = self.dtype.ordered new_dtype = CategoricalDtype(new_categories, ordered=ordered) - cat = self if inplace else self.copy() + cat = self.copy() if rename: if cat.dtype.categories is not None and len(new_dtype.categories) < len( cat.dtype.categories @@ -1038,26 +920,9 @@ def set_categories( cat.codes, cat.categories, new_dtype.categories ) NDArrayBacked.__init__(cat, codes, new_dtype) + return cat - if not inplace: - return cat - - @overload - def rename_categories( - self, new_categories, *, inplace: Literal[False] | NoDefault = ... - ) -> Categorical: - ... - - @overload - def rename_categories(self, new_categories, *, inplace: Literal[True]) -> None: - ... - - @deprecate_nonkeyword_arguments( - version=None, allowed_args=["self", "new_categories"] - ) - def rename_categories( - self, new_categories, inplace: bool | NoDefault = no_default - ) -> Categorical | None: + def rename_categories(self, new_categories) -> Categorical: """ Rename categories. @@ -1078,16 +943,10 @@ def rename_categories( * callable : a callable that is called on all items in the old categories and whose return values comprise the new categories. - inplace : bool, default False - Whether or not to rename the categories inplace or return a copy of - this categorical with renamed categories. - - .. deprecated:: 1.3.0 - Returns ------- - cat : Categorical or None - Categorical with removed categories or None if ``inplace=True``. + cat : Categorical + Categorical with renamed categories. Raises ------ @@ -1123,32 +982,19 @@ def rename_categories( ['A', 'A', 'B'] Categories (2, object): ['A', 'B'] """ - if inplace is not no_default: - warn( - "The `inplace` parameter in pandas.Categorical." - "rename_categories is deprecated and will be removed in " - "a future version. Removing unused categories will always " - "return a new Categorical object.", - FutureWarning, - stacklevel=find_stack_level(), - ) - else: - inplace = False - - inplace = validate_bool_kwarg(inplace, "inplace") - cat = self if inplace else self.copy() if is_dict_like(new_categories): - new_categories = [new_categories.get(item, item) for item in cat.categories] + new_categories = [ + new_categories.get(item, item) for item in self.categories + ] elif callable(new_categories): - new_categories = [new_categories(item) for item in cat.categories] + new_categories = [new_categories(item) for item in self.categories] + cat = self.copy() cat._set_categories(new_categories) - if not inplace: - return cat - return None + return cat - def reorder_categories(self, new_categories, ordered=None, inplace=no_default): + def reorder_categories(self, new_categories, ordered=None): """ Reorder categories as specified in new_categories. @@ -1162,16 +1008,11 @@ def reorder_categories(self, new_categories, ordered=None, inplace=no_default): ordered : bool, optional Whether or not the categorical is treated as a ordered categorical. If not given, do not change the ordered information. - inplace : bool, default False - Whether or not to reorder the categories inplace or return a copy of - this categorical with reordered categories. - - .. deprecated:: 1.3.0 Returns ------- - cat : Categorical or None - Categorical with removed categories or None if ``inplace=True``. + cat : Categorical + Categorical with reordered categories. Raises ------ @@ -1187,44 +1028,13 @@ def reorder_categories(self, new_categories, ordered=None, inplace=no_default): remove_unused_categories : Remove categories which are not used. set_categories : Set the categories to the specified ones. """ - if inplace is not no_default: - warn( - "The `inplace` parameter in pandas.Categorical." - "reorder_categories is deprecated and will be removed in " - "a future version. Reordering categories will always " - "return a new Categorical object.", - FutureWarning, - stacklevel=find_stack_level(), - ) - else: - inplace = False - - inplace = validate_bool_kwarg(inplace, "inplace") if set(self.dtype.categories) != set(new_categories): raise ValueError( "items in new_categories are not the same as in old categories" ) + return self.set_categories(new_categories, ordered=ordered) - with catch_warnings(): - simplefilter("ignore") - return self.set_categories(new_categories, ordered=ordered, inplace=inplace) - - @overload - def add_categories( - self, new_categories, *, inplace: Literal[False] | NoDefault = ... - ) -> Categorical: - ... - - @overload - def add_categories(self, new_categories, *, inplace: Literal[True]) -> None: - ... - - @deprecate_nonkeyword_arguments( - version=None, allowed_args=["self", "new_categories"] - ) - def add_categories( - self, new_categories, inplace: bool | NoDefault = no_default - ) -> Categorical | None: + def add_categories(self, new_categories) -> Categorical: """ Add new categories. @@ -1235,16 +1045,11 @@ def add_categories( ---------- new_categories : category or list-like of category The new categories to be included. - inplace : bool, default False - Whether or not to add the categories inplace or return a copy of - this categorical with added categories. - - .. deprecated:: 1.3.0 Returns ------- - cat : Categorical or None - Categorical with new categories added or None if ``inplace=True``. + cat : Categorical + Categorical with new categories added. Raises ------ @@ -1271,19 +1076,7 @@ def add_categories( ['c', 'b', 'c'] Categories (4, object): ['b', 'c', 'd', 'a'] """ - if inplace is not no_default: - warn( - "The `inplace` parameter in pandas.Categorical." - "add_categories is deprecated and will be removed in " - "a future version. Removing unused categories will always " - "return a new Categorical object.", - FutureWarning, - stacklevel=find_stack_level(), - ) - else: - inplace = False - inplace = validate_bool_kwarg(inplace, "inplace") if not is_list_like(new_categories): new_categories = [new_categories] already_included = set(new_categories) & set(self.dtype.categories) @@ -1305,15 +1098,12 @@ def add_categories( new_categories = list(self.dtype.categories) + list(new_categories) new_dtype = CategoricalDtype(new_categories, self.ordered) - - cat = self if inplace else self.copy() + cat = self.copy() codes = coerce_indexer_dtype(cat._ndarray, new_dtype.categories) NDArrayBacked.__init__(cat, codes, new_dtype) - if not inplace: - return cat - return None + return cat - def remove_categories(self, removals, inplace=no_default): + def remove_categories(self, removals): """ Remove the specified categories. @@ -1324,16 +1114,11 @@ def remove_categories(self, removals, inplace=no_default): ---------- removals : category or list of categories The categories which should be removed. - inplace : bool, default False - Whether or not to remove the categories inplace or return a copy of - this categorical with removed categories. - - .. deprecated:: 1.3.0 Returns ------- - cat : Categorical or None - Categorical with removed categories or None if ``inplace=True``. + cat : Categorical + Categorical with removed categories. Raises ------ @@ -1359,19 +1144,6 @@ def remove_categories(self, removals, inplace=no_default): [NaN, 'c', 'b', 'c', NaN] Categories (2, object): ['b', 'c'] """ - if inplace is not no_default: - warn( - "The `inplace` parameter in pandas.Categorical." - "remove_categories is deprecated and will be removed in " - "a future version. Removing unused categories will always " - "return a new Categorical object.", - FutureWarning, - stacklevel=find_stack_level(), - ) - else: - inplace = False - - inplace = validate_bool_kwarg(inplace, "inplace") if not is_list_like(removals): removals = [removals] @@ -1387,11 +1159,7 @@ def remove_categories(self, removals, inplace=no_default): if len(not_included) != 0: raise ValueError(f"removals must all be in old categories: {not_included}") - with catch_warnings(): - simplefilter("ignore") - return self.set_categories( - new_categories, ordered=self.ordered, rename=False, inplace=inplace - ) + return self.set_categories(new_categories, ordered=self.ordered, rename=False) def remove_unused_categories(self) -> Categorical: """ @@ -2536,9 +2304,7 @@ def _replace(self, *, to_replace, value, inplace: bool = False): continue if replace_value in cat.categories: if isna(new_value): - with catch_warnings(): - simplefilter("ignore") - cat.remove_categories(replace_value, inplace=True) + cat = cat.remove_categories(replace_value) continue categories = cat.categories.tolist() @@ -2547,16 +2313,13 @@ def _replace(self, *, to_replace, value, inplace: bool = False): if new_value in cat.categories: value_index = categories.index(new_value) cat._codes[cat._codes == index] = value_index - with catch_warnings(): - simplefilter("ignore") - cat.remove_categories(replace_value, inplace=True) + cat = cat.remove_categories(replace_value) else: categories[index] = new_value - with catch_warnings(): - simplefilter("ignore") - cat.rename_categories(categories, inplace=True) + cat = cat.rename_categories(categories) if not inplace: return cat + NDArrayBacked.__init__(self, cat._ndarray, cat.dtype) # ------------------------------------------------------------------------ # String methods interface @@ -2604,10 +2367,6 @@ class CategoricalAccessor(PandasDelegate, PandasObject, NoNewAttributesMixin): """ Accessor object for categorical properties of the Series values. - Be aware that assigning to `categories` is a inplace operation, while all - methods return new categorical data per default (but can be called with - `inplace=True`). - Parameters ---------- data : Series or CategoricalIndex diff --git a/pandas/tests/arrays/categorical/test_analytics.py b/pandas/tests/arrays/categorical/test_analytics.py index 2723b838c41a2..55d39cf84eb30 100644 --- a/pandas/tests/arrays/categorical/test_analytics.py +++ b/pandas/tests/arrays/categorical/test_analytics.py @@ -320,48 +320,6 @@ def test_validate_inplace_raises(self, value): 'For argument "inplace" expected type bool, ' f"received type {type(value).__name__}" ) - with pytest.raises(ValueError, match=msg): - with tm.assert_produces_warning( - FutureWarning, match="Use rename_categories" - ): - cat.set_ordered(value=True, inplace=value) - - with pytest.raises(ValueError, match=msg): - with tm.assert_produces_warning( - FutureWarning, match="Use rename_categories" - ): - cat.as_ordered(inplace=value) - - with pytest.raises(ValueError, match=msg): - with tm.assert_produces_warning( - FutureWarning, match="Use rename_categories" - ): - cat.as_unordered(inplace=value) - - with pytest.raises(ValueError, match=msg): - with tm.assert_produces_warning(FutureWarning): - # issue #37643 inplace kwarg deprecated - cat.set_categories(["X", "Y", "Z"], rename=True, inplace=value) - - with pytest.raises(ValueError, match=msg): - with tm.assert_produces_warning(FutureWarning): - # issue #37643 inplace kwarg deprecated - cat.rename_categories(["X", "Y", "Z"], inplace=value) - - with pytest.raises(ValueError, match=msg): - with tm.assert_produces_warning(FutureWarning): - # issue #37643 inplace kwarg deprecated - cat.reorder_categories(["X", "Y", "Z"], ordered=True, inplace=value) - - with pytest.raises(ValueError, match=msg): - with tm.assert_produces_warning(FutureWarning): - # issue #37643 inplace kwarg deprecated - cat.add_categories(new_categories=["D", "E", "F"], inplace=value) - - with pytest.raises(ValueError, match=msg): - with tm.assert_produces_warning(FutureWarning): - # issue #37643 inplace kwarg deprecated - cat.remove_categories(removals=["D", "E", "F"], inplace=value) with pytest.raises(ValueError, match=msg): cat.sort_values(inplace=value) diff --git a/pandas/tests/arrays/categorical/test_api.py b/pandas/tests/arrays/categorical/test_api.py index 2f13d4ee0dd40..450581f89d735 100644 --- a/pandas/tests/arrays/categorical/test_api.py +++ b/pandas/tests/arrays/categorical/test_api.py @@ -37,31 +37,14 @@ def test_ordered_api(self): assert cat4.ordered def test_set_ordered(self): - msg = ( - "The `inplace` parameter in pandas.Categorical.set_ordered is " - "deprecated and will be removed in a future version. setting " - "ordered-ness on categories will always return a new Categorical object" - ) cat = Categorical(["a", "b", "c", "a"], ordered=True) cat2 = cat.as_unordered() assert not cat2.ordered cat2 = cat.as_ordered() assert cat2.ordered - with tm.assert_produces_warning(FutureWarning, match=msg): - cat2.as_unordered(inplace=True) - assert not cat2.ordered - with tm.assert_produces_warning(FutureWarning, match=msg): - cat2.as_ordered(inplace=True) - assert cat2.ordered assert cat2.set_ordered(True).ordered assert not cat2.set_ordered(False).ordered - with tm.assert_produces_warning(FutureWarning, match=msg): - cat2.set_ordered(True, inplace=True) - assert cat2.ordered - with tm.assert_produces_warning(FutureWarning, match=msg): - cat2.set_ordered(False, inplace=True) - assert not cat2.ordered # removed in 0.19.0 msg = ( @@ -95,17 +78,6 @@ def test_rename_categories(self): expected = Categorical(["A", "B", "C", "A"]) tm.assert_categorical_equal(result, expected) - # and now inplace - with tm.assert_produces_warning(FutureWarning): - # issue #37643 inplace kwarg deprecated - res = cat.rename_categories([1, 2, 3], inplace=True) - - assert res is None - tm.assert_numpy_array_equal( - cat.__array__(), np.array([1, 2, 3, 1], dtype=np.int64) - ) - tm.assert_index_equal(cat.categories, Index([1, 2, 3])) - @pytest.mark.parametrize("new_categories", [[1, 2, 3, 4], [1, 2]]) def test_rename_categories_wrong_length_raises(self, new_categories): cat = Categorical(["a", "b", "c", "a"]) @@ -130,14 +102,6 @@ def test_rename_categories_dict(self): expected = Index([4, 3, 2, 1]) tm.assert_index_equal(res.categories, expected) - # Test for inplace - with tm.assert_produces_warning(FutureWarning): - # issue #37643 inplace kwarg deprecated - res = cat.rename_categories({"a": 4, "b": 3, "c": 2, "d": 1}, inplace=True) - - assert res is None - tm.assert_index_equal(cat.categories, expected) - # Test for dicts of smaller length cat = Categorical(["a", "b", "c", "d"]) res = cat.rename_categories({"a": 1, "c": 3}) @@ -165,21 +129,12 @@ def test_reorder_categories(self): ["a", "b", "c", "a"], categories=["c", "b", "a"], ordered=True ) - # first inplace == False res = cat.reorder_categories(["c", "b", "a"]) # cat must be the same as before tm.assert_categorical_equal(cat, old) # only res is changed tm.assert_categorical_equal(res, new) - # inplace == True - with tm.assert_produces_warning(FutureWarning): - # issue #37643 inplace kwarg deprecated - res = cat.reorder_categories(["c", "b", "a"], inplace=True) - - assert res is None - tm.assert_categorical_equal(cat, new) - @pytest.mark.parametrize( "new_categories", [ @@ -201,7 +156,6 @@ def test_add_categories(self): ["a", "b", "c", "a"], categories=["a", "b", "c", "d"], ordered=True ) - # first inplace == False res = cat.add_categories("d") tm.assert_categorical_equal(cat, old) tm.assert_categorical_equal(res, new) @@ -210,14 +164,6 @@ def test_add_categories(self): tm.assert_categorical_equal(cat, old) tm.assert_categorical_equal(res, new) - # inplace == True - with tm.assert_produces_warning(FutureWarning): - # issue #37643 inplace kwarg deprecated - res = cat.add_categories("d", inplace=True) - - tm.assert_categorical_equal(cat, new) - assert res is None - # GH 9927 cat = Categorical(list("abc"), ordered=True) expected = Categorical(list("abc"), categories=list("abcde"), ordered=True) @@ -262,14 +208,7 @@ def test_set_categories(self): exp_categories = Index(["c", "b", "a"]) exp_values = np.array(["a", "b", "c", "a"], dtype=np.object_) - with tm.assert_produces_warning(FutureWarning): - # issue #37643 inplace kwarg deprecated - res = cat.set_categories(["c", "b", "a"], inplace=True) - - tm.assert_index_equal(cat.categories, exp_categories) - tm.assert_numpy_array_equal(cat.__array__(), exp_values) - assert res is None - + cat = cat.set_categories(["c", "b", "a"]) res = cat.set_categories(["a", "b", "c"]) # cat must be the same as before tm.assert_index_equal(cat.categories, exp_categories) @@ -386,7 +325,6 @@ def test_remove_categories(self): old = cat.copy() new = Categorical(["a", "b", np.nan, "a"], categories=["a", "b"], ordered=True) - # first inplace == False res = cat.remove_categories("c") tm.assert_categorical_equal(cat, old) tm.assert_categorical_equal(res, new) @@ -395,14 +333,6 @@ def test_remove_categories(self): tm.assert_categorical_equal(cat, old) tm.assert_categorical_equal(res, new) - # inplace == True - with tm.assert_produces_warning(FutureWarning): - # issue #37643 inplace kwarg deprecated - res = cat.remove_categories("c", inplace=True) - - tm.assert_categorical_equal(cat, new) - assert res is None - @pytest.mark.parametrize("removals", [["c"], ["c", np.nan], "c", ["c", "c"]]) def test_remove_categories_raises(self, removals): cat = Categorical(["a", "b", "a"]) @@ -462,11 +392,7 @@ def test_describe(self, factor): # check unused categories cat = factor.copy() - - with tm.assert_produces_warning(FutureWarning): - # issue #37643 inplace kwarg deprecated - cat.set_categories(["a", "b", "c", "d"], inplace=True) - + cat = cat.set_categories(["a", "b", "c", "d"]) desc = cat.describe() exp_index = CategoricalIndex( @@ -500,15 +426,6 @@ def test_describe(self, factor): ) tm.assert_frame_equal(desc, expected) - def test_set_categories_inplace(self, factor): - cat = factor.copy() - - with tm.assert_produces_warning(FutureWarning): - # issue #37643 inplace kwarg deprecated - cat.set_categories(["a", "b", "c", "d"], inplace=True) - - tm.assert_index_equal(cat.categories, Index(["a", "b", "c", "d"])) - class TestPrivateCategoricalAPI: def test_codes_immutable(self): diff --git a/pandas/tests/arrays/categorical/test_indexing.py b/pandas/tests/arrays/categorical/test_indexing.py index 94e966642b925..6571b4d0ce4a8 100644 --- a/pandas/tests/arrays/categorical/test_indexing.py +++ b/pandas/tests/arrays/categorical/test_indexing.py @@ -191,14 +191,6 @@ def test_periodindex(self): tm.assert_numpy_array_equal(cat3._codes, exp_arr) tm.assert_index_equal(cat3.categories, exp_idx) - def test_categories_assignments(self): - cat = Categorical(["a", "b", "c", "a"]) - exp = np.array([1, 2, 3, 1], dtype=np.int64) - with tm.assert_produces_warning(FutureWarning, match="Use rename_categories"): - cat.categories = [1, 2, 3] - tm.assert_numpy_array_equal(cat.__array__(), exp) - tm.assert_index_equal(cat.categories, Index([1, 2, 3])) - @pytest.mark.parametrize( "null_val", [None, np.nan, NaT, NA, math.nan, "NaT", "nat", "NAT", "nan", "NaN", "NAN"], @@ -210,17 +202,6 @@ def test_periodindex_on_null_types(self, null_val): assert result[2] is NaT tm.assert_index_equal(result, expected) - @pytest.mark.parametrize("new_categories", [[1, 2, 3, 4], [1, 2]]) - def test_categories_assignments_wrong_length_raises(self, new_categories): - cat = Categorical(["a", "b", "c", "a"]) - msg = ( - "new categories need to have the same number of items " - "as the old categories!" - ) - with tm.assert_produces_warning(FutureWarning, match="Use rename_categories"): - with pytest.raises(ValueError, match=msg): - cat.categories = new_categories - # Combinations of sorted/unique: @pytest.mark.parametrize( "idx_values", [[1, 2, 3, 4], [1, 3, 2, 4], [1, 3, 3, 4], [1, 2, 2, 4]] From cdbed928f566f23f1799b71d5e00908d19501818 Mon Sep 17 00:00:00 2001 From: Luke Manley Date: Tue, 25 Oct 2022 21:53:27 -0400 Subject: [PATCH 2/7] fix tests --- pandas/tests/frame/indexing/test_setitem.py | 7 +--- .../series/accessors/test_cat_accessor.py | 40 +++---------------- pandas/tests/series/test_constructors.py | 9 ++--- 3 files changed, 9 insertions(+), 47 deletions(-) diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 6e7ba57dddf8f..1c08a37c58e4e 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -1076,12 +1076,7 @@ def test_setitem_mask_categorical(self): df = DataFrame({"cats": catsf, "values": valuesf}, index=idxf) exp_fancy = exp_multi_row.copy() - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - # issue #37643 inplace kwarg deprecated - return_value = exp_fancy["cats"].cat.set_categories( - ["a", "b", "c"], inplace=True - ) - assert return_value is None + exp_fancy["cats"] = exp_fancy["cats"].cat.set_categories(["a", "b", "c"]) mask = df["cats"] == "c" df[mask] = ["b", 2] diff --git a/pandas/tests/series/accessors/test_cat_accessor.py b/pandas/tests/series/accessors/test_cat_accessor.py index 48a01f0018775..54a3ddd24a203 100644 --- a/pandas/tests/series/accessors/test_cat_accessor.py +++ b/pandas/tests/series/accessors/test_cat_accessor.py @@ -46,13 +46,6 @@ def test_cat_accessor(self): exp = Categorical(["a", "b", np.nan, "a"], categories=["b", "a"]) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - # issue #37643 inplace kwarg deprecated - return_value = ser.cat.set_categories(["b", "a"], inplace=True) - - assert return_value is None - tm.assert_categorical_equal(ser.values, exp) - res = ser.cat.set_categories(["b", "a"]) tm.assert_categorical_equal(res.values, exp) @@ -99,8 +92,7 @@ def test_categorical_delegations(self): ser = Series(Categorical(["a", "b", "c", "a"], ordered=True)) exp_categories = Index(["a", "b", "c"]) tm.assert_index_equal(ser.cat.categories, exp_categories) - with tm.assert_produces_warning(FutureWarning, match="Use rename_categories"): - ser.cat.categories = [1, 2, 3] + ser = ser.cat.rename_categories([1, 2, 3]) exp_categories = Index([1, 2, 3]) tm.assert_index_equal(ser.cat.categories, exp_categories) @@ -110,9 +102,8 @@ def test_categorical_delegations(self): assert ser.cat.ordered ser = ser.cat.as_unordered() assert not ser.cat.ordered - with tm.assert_produces_warning(FutureWarning, match="The `inplace`"): - return_value = ser.cat.as_ordered(inplace=True) - assert return_value is None + + ser = ser.cat.as_ordered() assert ser.cat.ordered # reorder @@ -234,34 +225,13 @@ def test_dt_accessor_api_for_categorical_invalid(self): invalid.dt assert not hasattr(invalid, "str") - def test_reorder_categories_updates_dtype(self): - # GH#43232 - ser = Series(["a", "b", "c"], dtype="category") - orig_dtype = ser.dtype - - # Need to construct this before calling reorder_categories inplace - expected = ser.cat.reorder_categories(["c", "b", "a"]) - - with tm.assert_produces_warning(FutureWarning, match="`inplace` parameter"): - ser.cat.reorder_categories(["c", "b", "a"], inplace=True) - - assert not orig_dtype.categories.equals(ser.dtype.categories) - assert not orig_dtype.categories.equals(expected.dtype.categories) - assert ser.dtype == expected.dtype - assert ser.dtype.categories.equals(expected.dtype.categories) - - tm.assert_series_equal(ser, expected) - def test_set_categories_setitem(self): # GH#43334 df = DataFrame({"Survived": [1, 0, 1], "Sex": [0, 1, 1]}, dtype="category") - # change the dtype in-place - with tm.assert_produces_warning(FutureWarning, match="Use rename_categories"): - df["Survived"].cat.categories = ["No", "Yes"] - with tm.assert_produces_warning(FutureWarning, match="Use rename_categories"): - df["Sex"].cat.categories = ["female", "male"] + df["Survived"] = df["Survived"].cat.rename_categories(["No", "Yes"]) + df["Sex"] = df["Sex"].cat.rename_categories(["female", "male"]) # values should not be coerced to NaN assert list(df["Sex"]) == ["female", "male", "male"] diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 35ebd152f447c..91cc9d7e51486 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -478,8 +478,7 @@ def test_categorical_sideeffects_free(self): cat = Categorical(["a", "b", "c", "a"]) s = Series(cat, copy=True) assert s.cat is not cat - with tm.assert_produces_warning(FutureWarning, match="Use rename_categories"): - s.cat.categories = [1, 2, 3] + s = s.cat.rename_categories([1, 2, 3]) exp_s = np.array([1, 2, 3, 1], dtype=np.int64) exp_cat = np.array(["a", "b", "c", "a"], dtype=np.object_) tm.assert_numpy_array_equal(s.__array__(), exp_s) @@ -496,16 +495,14 @@ def test_categorical_sideeffects_free(self): cat = Categorical(["a", "b", "c", "a"]) s = Series(cat) assert s.values is cat - with tm.assert_produces_warning(FutureWarning, match="Use rename_categories"): - s.cat.categories = [1, 2, 3] + s = s.cat.rename_categories([1, 2, 3]) + assert s.values is not cat exp_s = np.array([1, 2, 3, 1], dtype=np.int64) tm.assert_numpy_array_equal(s.__array__(), exp_s) - tm.assert_numpy_array_equal(cat.__array__(), exp_s) s[0] = 2 exp_s2 = np.array([2, 2, 3, 1], dtype=np.int64) tm.assert_numpy_array_equal(s.__array__(), exp_s2) - tm.assert_numpy_array_equal(cat.__array__(), exp_s2) def test_unordered_compare_equal(self): left = Series(["a", "b", "c"], dtype=CategoricalDtype(["a", "b"])) From 41d0092bdcf3a13047fc454d2db154c81737984c Mon Sep 17 00:00:00 2001 From: Luke Manley Date: Tue, 25 Oct 2022 22:24:11 -0400 Subject: [PATCH 3/7] add back test --- pandas/tests/arrays/categorical/test_indexing.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pandas/tests/arrays/categorical/test_indexing.py b/pandas/tests/arrays/categorical/test_indexing.py index 6571b4d0ce4a8..d42b73b7c0020 100644 --- a/pandas/tests/arrays/categorical/test_indexing.py +++ b/pandas/tests/arrays/categorical/test_indexing.py @@ -202,6 +202,16 @@ def test_periodindex_on_null_types(self, null_val): assert result[2] is NaT tm.assert_index_equal(result, expected) + @pytest.mark.parametrize("new_categories", [[1, 2, 3, 4], [1, 2]]) + def test_categories_assignments_wrong_length_raises(self, new_categories): + cat = Categorical(["a", "b", "c", "a"]) + msg = ( + "new categories need to have the same number of items " + "as the old categories!" + ) + with pytest.raises(ValueError, match=msg): + cat.rename_categories(new_categories) + # Combinations of sorted/unique: @pytest.mark.parametrize( "idx_values", [[1, 2, 3, 4], [1, 3, 2, 4], [1, 3, 3, 4], [1, 2, 2, 4]] From d18cace1d57a2584e811c5a1a7759eebdc0e0534 Mon Sep 17 00:00:00 2001 From: Luke Manley Date: Wed, 26 Oct 2022 05:40:39 -0400 Subject: [PATCH 4/7] doc fix --- doc/source/user_guide/categorical.rst | 45 ++------------------------- 1 file changed, 2 insertions(+), 43 deletions(-) diff --git a/doc/source/user_guide/categorical.rst b/doc/source/user_guide/categorical.rst index b5cb1d83a9f52..d9896aec05e15 100644 --- a/doc/source/user_guide/categorical.rst +++ b/doc/source/user_guide/categorical.rst @@ -353,11 +353,6 @@ Renaming categories is done by using the In contrast to R's ``factor``, categorical data can have categories of other types than string. -.. note:: - - Be aware that assigning new categories is an inplace operation, while most other operations - under ``Series.cat`` per default return a new ``Series`` of dtype ``category``. - Categories must be unique or a ``ValueError`` is raised: .. ipython:: python @@ -952,7 +947,6 @@ categorical (categories and ordering). So if you read back the CSV file you have relevant columns back to ``category`` and assign the right categories and categories ordering. .. ipython:: python - :okwarning: import io @@ -969,8 +963,8 @@ relevant columns back to ``category`` and assign the right categories and catego df2["cats"] # Redo the category df2["cats"] = df2["cats"].astype("category") - df2["cats"].cat.set_categories( - ["very bad", "bad", "medium", "good", "very good"], inplace=True + df2["cats"] = df2["cats"].cat.set_categories( + ["very bad", "bad", "medium", "good", "very good"] ) df2.dtypes df2["cats"] @@ -1153,38 +1147,3 @@ Setting the index will create a ``CategoricalIndex``: df.index # This now sorts by the categories order df.sort_index() - -Side effects -~~~~~~~~~~~~ - -Constructing a ``Series`` from a ``Categorical`` will not copy the input -``Categorical``. This means that changes to the ``Series`` will in most cases -change the original ``Categorical``: - -.. ipython:: python - :okwarning: - - cat = pd.Categorical([1, 2, 3, 10], categories=[1, 2, 3, 4, 10]) - s = pd.Series(cat, name="cat") - cat - s.iloc[0:2] = 10 - cat - df = pd.DataFrame(s) - df["cat"].cat.categories = [1, 2, 3, 4, 5] - cat - -Use ``copy=True`` to prevent such a behaviour or simply don't reuse ``Categoricals``: - -.. ipython:: python - - cat = pd.Categorical([1, 2, 3, 10], categories=[1, 2, 3, 4, 10]) - s = pd.Series(cat, name="cat", copy=True) - cat - s.iloc[0:2] = 10 - cat - -.. note:: - - This also happens in some cases when you supply a NumPy array instead of a ``Categorical``: - using an int array (e.g. ``np.array([1,2,3,4])``) will exhibit the same behavior, while using - a string array (e.g. ``np.array(["a","b","c","a"])``) will not. From 6b5528e0f85f2436744ef8f7af7c31d3fa7ad19c Mon Sep 17 00:00:00 2001 From: Luke Manley Date: Wed, 26 Oct 2022 07:51:50 -0400 Subject: [PATCH 5/7] doc fixes --- doc/source/whatsnew/v0.15.0.rst | 3 +-- doc/source/whatsnew/v0.19.0.rst | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v0.15.0.rst b/doc/source/whatsnew/v0.15.0.rst index 04506f1655c7d..f52253687ecfd 100644 --- a/doc/source/whatsnew/v0.15.0.rst +++ b/doc/source/whatsnew/v0.15.0.rst @@ -70,7 +70,6 @@ For full docs, see the :ref:`categorical introduction ` and the :ref:`API documentation `. .. ipython:: python - :okwarning: df = pd.DataFrame({"id": [1, 2, 3, 4, 5, 6], "raw_grade": ['a', 'b', 'b', 'a', 'a', 'e']}) @@ -79,7 +78,7 @@ For full docs, see the :ref:`categorical introduction ` and the df["grade"] # Rename the categories - df["grade"].cat.categories = ["very good", "good", "very bad"] + df["grade"] = df["grade"].cat.rename_categories(["very good", "good", "very bad"]) # Reorder the categories and simultaneously add the missing categories df["grade"] = df["grade"].cat.set_categories(["very bad", "bad", diff --git a/doc/source/whatsnew/v0.19.0.rst b/doc/source/whatsnew/v0.19.0.rst index 0c992cf3cc462..feeb7b5ee30ce 100644 --- a/doc/source/whatsnew/v0.19.0.rst +++ b/doc/source/whatsnew/v0.19.0.rst @@ -271,12 +271,12 @@ Individual columns can be parsed as a ``Categorical`` using a dict specification such as :func:`to_datetime`. .. ipython:: python - :okwarning: df = pd.read_csv(StringIO(data), dtype="category") df.dtypes df["col3"] - df["col3"].cat.categories = pd.to_numeric(df["col3"].cat.categories) + new_categories = pd.to_numeric(df["col3"].cat.categories) + df["col3"] = df["col3"].cat.rename_categories(new_categories) df["col3"] .. _whatsnew_0190.enhancements.union_categoricals: From 45a8510f8f9ca6c1f23c63e6dd13abdec87774dc Mon Sep 17 00:00:00 2001 From: Luke Manley Date: Wed, 26 Oct 2022 19:02:41 -0400 Subject: [PATCH 6/7] avoid constructing new objects on every iteration --- doc/source/user_guide/categorical.rst | 31 +++++++++++++++ pandas/core/arrays/categorical.py | 56 +++++++++++++++------------ 2 files changed, 62 insertions(+), 25 deletions(-) diff --git a/doc/source/user_guide/categorical.rst b/doc/source/user_guide/categorical.rst index d9896aec05e15..f3d68f4c471c1 100644 --- a/doc/source/user_guide/categorical.rst +++ b/doc/source/user_guide/categorical.rst @@ -1147,3 +1147,34 @@ Setting the index will create a ``CategoricalIndex``: df.index # This now sorts by the categories order df.sort_index() + +Side effects +~~~~~~~~~~~~ + +Constructing a ``Series`` from a ``Categorical`` will not copy the input +``Categorical``. This means that changes to the ``Series`` will in most cases +change the original ``Categorical``: + +.. ipython:: python + + cat = pd.Categorical([1, 2, 3, 10], categories=[1, 2, 3, 4, 10]) + s = pd.Series(cat, name="cat") + cat + s.iloc[0:2] = 10 + cat + +Use ``copy=True`` to prevent such a behaviour or simply don't reuse ``Categoricals``: + +.. ipython:: python + + cat = pd.Categorical([1, 2, 3, 10], categories=[1, 2, 3, 4, 10]) + s = pd.Series(cat, name="cat", copy=True) + cat + s.iloc[0:2] = 10 + cat + +.. note:: + + This also happens in some cases when you supply a NumPy array instead of a ``Categorical``: + using an int array (e.g. ``np.array([1,2,3,4])``) will exhibit the same behavior, while using + a string array (e.g. ``np.array(["a","b","c","a"])``) will not. diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 2e0b3ceb3bd71..8eb85add7c514 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -2289,34 +2289,40 @@ def _replace(self, *, to_replace, value, inplace: bool = False): inplace = validate_bool_kwarg(inplace, "inplace") cat = self if inplace else self.copy() - # build a dict of (to replace -> value) pairs - if is_list_like(to_replace): - # if to_replace is list-like and value is scalar - replace_dict = {replace_value: value for replace_value in to_replace} - else: - # if both to_replace and value are scalar - replace_dict = {to_replace: value} - # other cases, like if both to_replace and value are list-like or if # to_replace is a dict, are handled separately in NDFrame - for replace_value, new_value in replace_dict.items(): - if new_value == replace_value: + if not is_list_like(to_replace): + to_replace = [to_replace] + + categories = cat.categories.tolist() + removals = set() + for replace_value in to_replace: + if value == replace_value: + continue + if replace_value not in cat.categories: continue - if replace_value in cat.categories: - if isna(new_value): - cat = cat.remove_categories(replace_value) - continue - - categories = cat.categories.tolist() - index = categories.index(replace_value) - - if new_value in cat.categories: - value_index = categories.index(new_value) - cat._codes[cat._codes == index] = value_index - cat = cat.remove_categories(replace_value) - else: - categories[index] = new_value - cat = cat.rename_categories(categories) + if isna(value): + removals.add(replace_value) + continue + + index = categories.index(replace_value) + + if value in cat.categories: + value_index = categories.index(value) + cat._codes[cat._codes == index] = value_index + removals.add(replace_value) + else: + categories[index] = value + cat._set_categories(categories) + + if len(removals): + new_categories = [c for c in categories if c not in removals] + new_dtype = CategoricalDtype(new_categories, ordered=self.dtype.ordered) + codes = recode_for_categories( + cat.codes, cat.categories, new_dtype.categories + ) + NDArrayBacked.__init__(cat, codes, new_dtype) + if not inplace: return cat NDArrayBacked.__init__(self, cat._ndarray, cat.dtype) From 25b1c28de6059a8b50a10b3d273c2e84d8d7b3c2 Mon Sep 17 00:00:00 2001 From: Luke Manley Date: Fri, 28 Oct 2022 17:59:25 -0400 Subject: [PATCH 7/7] cleanup --- pandas/core/arrays/categorical.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index fea6c6b04b1dc..becca2b668290 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -40,7 +40,6 @@ type_t, ) from pandas.compat.numpy import function as nv -from pandas.util._decorators import deprecate_nonkeyword_arguments from pandas.util._exceptions import find_stack_level from pandas.util._validators import validate_bool_kwarg @@ -2326,7 +2325,6 @@ def _replace(self, *, to_replace, value, inplace: bool = False): if not inplace: return cat - NDArrayBacked.__init__(self, cat._ndarray, cat.dtype) # ------------------------------------------------------------------------ # String methods interface