From 2fb2fdb268603252a7f6519ab50d1f5f65770446 Mon Sep 17 00:00:00 2001 From: Luke Manley Date: Sun, 29 Jan 2023 16:06:25 -0500 Subject: [PATCH 1/2] BUG: Series(category).replace to maintain order of categories --- pandas/core/arrays/categorical.py | 8 +++++++- pandas/tests/arrays/categorical/test_replace.py | 10 ++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 5b61695410474..bbdb3ea7ea974 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -2284,7 +2284,13 @@ def _replace(self, *, to_replace, value, inplace: bool = False): ser = ser.replace(to_replace=to_replace, value=value) all_values = Index(ser) - new_categories = Index(ser.drop_duplicates(keep="first")) + new_categories = ser.drop_duplicates(keep="first") + + # GH51016: maintain order of existing categories + overlap = cat.categories[cat.categories.isin(new_categories)] + new_categories[new_categories.isin(overlap)] = overlap + + new_categories = Index(new_categories) new_codes = recode_for_categories( cat._codes, all_values, new_categories, copy=False ) diff --git a/pandas/tests/arrays/categorical/test_replace.py b/pandas/tests/arrays/categorical/test_replace.py index c25f1d9c9feac..ee9e1dbc81e12 100644 --- a/pandas/tests/arrays/categorical/test_replace.py +++ b/pandas/tests/arrays/categorical/test_replace.py @@ -78,3 +78,13 @@ def test_replace_categorical_ea_dtype(): result = pd.Series(cat).replace(["a", "b"], ["c", pd.NA])._values expected = Categorical(pd.array(["c", pd.NA], dtype="string")) tm.assert_categorical_equal(result, expected) + + +def test_replace_maintain_ordering(): + # GH51016 + dtype = pd.CategoricalDtype([0, 1, 2], ordered=True) + ser = pd.Series([0, 1, 2], dtype=dtype) + result = ser.replace(0, 2) + expected_dtype = pd.CategoricalDtype([1, 2], ordered=True) + expected = pd.Series([2, 1, 2], dtype=expected_dtype) + tm.assert_series_equal(expected, result, check_category_order=True) From c12d40b6ed5f0727d65a345bcf295a9487ec8a3b Mon Sep 17 00:00:00 2001 From: Luke Manley Date: Sun, 29 Jan 2023 20:51:22 -0500 Subject: [PATCH 2/2] fix --- pandas/core/arrays/categorical.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index bbdb3ea7ea974..6c497b3a7b6d8 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -2284,12 +2284,15 @@ def _replace(self, *, to_replace, value, inplace: bool = False): ser = ser.replace(to_replace=to_replace, value=value) all_values = Index(ser) - new_categories = ser.drop_duplicates(keep="first") # GH51016: maintain order of existing categories - overlap = cat.categories[cat.categories.isin(new_categories)] - new_categories[new_categories.isin(overlap)] = overlap + idxr = cat.categories.get_indexer_for(all_values) + locs = np.arange(len(ser)) + locs = np.where(idxr == -1, locs, idxr) + locs = locs.argsort() + new_categories = ser.take(locs) + new_categories = new_categories.drop_duplicates(keep="first") new_categories = Index(new_categories) new_codes = recode_for_categories( cat._codes, all_values, new_categories, copy=False