diff --git a/doc/source/whatsnew/v1.2.1.rst b/doc/source/whatsnew/v1.2.1.rst index 24ba9be4383eb..276b015450212 100644 --- a/doc/source/whatsnew/v1.2.1.rst +++ b/doc/source/whatsnew/v1.2.1.rst @@ -61,6 +61,7 @@ Other - Bumped minimum pymysql version to 0.8.1 to avoid test failures (:issue:`38344`) - Fixed build failure on MacOS 11 in Python 3.9.1 (:issue:`38766`) - Added reference to backwards incompatible ``check_freq`` arg of :func:`testing.assert_frame_equal` and :func:`testing.assert_series_equal` in :ref:`pandas 1.1.0 whats new ` (:issue:`34050`) +- :class:`Index` and :class:`MultiIndex` now have a ``replace()`` method (:issue:`19495`). .. --------------------------------------------------------------------------- diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 0b46b43514d92..a89d5c23ee998 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -98,6 +98,7 @@ from pandas.core.indexes.frozen import FrozenList from pandas.core.ops import get_op_result_name from pandas.core.ops.invalid import make_invalid_op +from pandas.core.shared_docs import _shared_docs from pandas.core.sorting import ensure_key_mapped, nargsort from pandas.core.strings import StringMethods @@ -124,6 +125,7 @@ "raises_section": "", "unique": "Index", "duplicated": "np.ndarray", + "replace_iloc": "", } _index_shared_docs = {} str_t = str @@ -1536,6 +1538,27 @@ def rename(self, name, inplace=False): """ return self.set_names([name], inplace=inplace) + @doc( + _shared_docs["replace"], + klass=_index_doc_kwargs["klass"], + inplace=_index_doc_kwargs["inplace"], + replace_iloc=_index_doc_kwargs["replace_iloc"], + ) + def replace( + self, + to_replace=None, + value=None, + limit=None, + regex=False, + method="pad", + ): + new_index = self.to_series().replace( + to_replace=to_replace, value=value, limit=limit, regex=regex, method=method + ) + new_index = Index(new_index) + + return new_index + # -------------------------------------------------------------------- # Level-Centric Methods diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index a8a872ff38fb8..4b85864c455f1 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -628,3 +628,24 @@ def _delegate_method(self, name: str, *args, **kwargs): if is_scalar(res): return res return CategoricalIndex(res, name=self.name) + + def replace( + self, + to_replace=None, + value=None, + limit=None, + regex=False, + method="pad", + ): + if regex is not False: + raise NotImplementedError( + "Regex replace is not yet implemented for CategoricalIndex." + ) + + new_index = self.to_series().replace( + to_replace=to_replace, value=value, limit=limit, regex=regex, method=method + ) + + new_index = CategoricalIndex(new_index) + + return new_index diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index a04933fc5ddfc..b9232d759abde 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -58,6 +58,7 @@ from pandas.core.indexes.frozen import FrozenList from pandas.core.indexes.numeric import Int64Index from pandas.core.ops.invalid import make_invalid_op +from pandas.core.shared_docs import _shared_docs from pandas.core.sorting import ( get_group_index, indexer_from_factorized, @@ -3776,6 +3777,29 @@ def isin(self, values, level=None): __abs__ = make_invalid_op("__abs__") __inv__ = make_invalid_op("__inv__") + @doc( + _shared_docs["replace"], + klass=_index_doc_kwargs["klass"], + inplace=_index_doc_kwargs["inplace"], + replace_iloc=_index_doc_kwargs["replace_iloc"], + ) + def replace( + self, + to_replace=None, + value=None, + limit=None, + regex=False, + method="pad", + ): + names = self.names + + result = self.to_frame().replace( + to_replace=to_replace, value=value, limit=limit, regex=regex, method=method + ) + new_multi_index = self.from_frame(result, names=names) + + return new_multi_index + def _lexsort_depth(codes: List[np.ndarray], nlevels: int) -> int: """Count depth (up to a maximum of `nlevels`) with which codes are lexsorted.""" diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index 1b570028964df..2c825f85233ad 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -1637,6 +1637,15 @@ def test_replace_unicode(self): expected = DataFrame({"positive": np.ones(3)}) tm.assert_frame_equal(result, expected) + def test_replace_multiple_bool_datetime_type_mismatch(self): + # See https://github.com/pandas-dev/pandas/pull/32542#discussion_r528338117 + df = DataFrame({"A": [True, False, True], "B": [False, True, False]}) + + result = df.replace({"a string": "new value", True: False}) + expected = DataFrame({"A": [False, False, False], "B": [False, False, False]}) + + tm.assert_frame_equal(result, expected) + def test_replace_bytes(self, frame_or_series): # GH#38900 obj = frame_or_series(["o"]).astype("|S") diff --git a/pandas/tests/indexes/base_class/test_replace.py b/pandas/tests/indexes/base_class/test_replace.py new file mode 100644 index 0000000000000..1887b171ef5cb --- /dev/null +++ b/pandas/tests/indexes/base_class/test_replace.py @@ -0,0 +1,77 @@ +import pytest + +import pandas as pd +import pandas._testing as tm + + +@pytest.mark.parametrize( + "index, to_replace, value, expected", + [ + ([1, 2, 3], [1, 3], ["a", "c"], ["a", 2, "c"]), + ([1, 2, 3], 1, "a", ["a", 2, 3]), + ( + [1, None, 2], + [1, 2], + "a", + ["a", None, "a"], + ), + ], +) +def test_index_replace(index, to_replace, value, expected): + index = pd.Index(index) + expected = pd.Index(expected) + + result = index.replace(to_replace=to_replace, value=value) + + tm.assert_equal(result, expected) + + +@pytest.mark.parametrize( + "index, to_replace, value, regex, expected", + [ + ( + ["bat", "foo", "baait", "bar"], + r"^ba.$", + "new", + True, + ["new", "foo", "baait", "new"], + ), + ( + ["bat", "foo", "baait", "bar"], + None, + None, + {r"^ba.$": "new", "foo": "xyz"}, + ["new", "xyz", "baait", "new"], + ), + ], +) +def test_index_replace_regex(index, to_replace, value, regex, expected): + index = pd.Index(index) + expected = pd.Index(expected) + + result = index.replace(to_replace=to_replace, value=value, regex=regex) + tm.assert_equal(expected, result) + + +def test_index_replace_dict_and_value(): + index = pd.Index([1, 2, 3]) + + msg = "Series.replace cannot use dict-like to_replace and non-None value" + with pytest.raises(ValueError, match=msg): + index.replace({1: "a", 3: "c"}, "x") + + +def test_index_replace_bfill(): + index = pd.Index([0, 1, 2, 3, 4]) + expected = pd.Index([0, 3, 3, 3, 4]) + + result = index.replace([1, 2], method="bfill") + tm.assert_equal(expected, result) + + +def test_index_name_preserved(): + index = pd.Index(range(2), name="foo") + expected = pd.Index([0, 0], name="foo") + + result = index.replace(1, 0) + tm.assert_equal(expected, result) diff --git a/pandas/tests/indexes/categorical/test_replace.py b/pandas/tests/indexes/categorical/test_replace.py new file mode 100644 index 0000000000000..a1e05b2c25148 --- /dev/null +++ b/pandas/tests/indexes/categorical/test_replace.py @@ -0,0 +1,55 @@ +import pytest + +import pandas as pd +import pandas._testing as tm + + +@pytest.mark.parametrize( + "index, to_replace, value, expected", + [ + ([1, 2, 3], 3, "a", [1, 2, "a"]), + ( + [1, None, 2], + [1, 2], + "a", + ["a", None, "a"], + ), + ], +) +def test_categorical_index_replace(index, to_replace, value, expected): + index = pd.CategoricalIndex(index) + expected = pd.CategoricalIndex(expected) + + result = index.replace(to_replace=to_replace, value=value) + + tm.assert_equal(result, expected) + + +def test_categorical_index_replace_dict_and_value(): + index = pd.CategoricalIndex([1, 2, 3]) + + msg = "Series.replace cannot use dict-like to_replace and non-None value" + with pytest.raises(ValueError, match=msg): + index.replace({1: "a", 3: "c"}, "x") + + +@pytest.mark.parametrize( + "index, to_replace, value, expected", + [ + ([1, 2, 3], [2, 3], ["b", "c"], [1, "b", "c"]), + ([1, 2, 3], 3, "c", [1, 2, "c"]), + ( + [1, None, 2], + [1, 2], + "a", + ["a", None, "a"], + ), + ], +) +def test_index_replace(index, to_replace, value, expected): + index = pd.CategoricalIndex(index) + expected = pd.CategoricalIndex(expected) + + result = index.replace(to_replace=to_replace, value=value) + + tm.assert_equal(result, expected) diff --git a/pandas/tests/indexes/multi/test_replace.py b/pandas/tests/indexes/multi/test_replace.py new file mode 100644 index 0000000000000..3b099680b7d30 --- /dev/null +++ b/pandas/tests/indexes/multi/test_replace.py @@ -0,0 +1,70 @@ +import pytest + +import pandas as pd +import pandas._testing as tm + + +@pytest.mark.parametrize( + "names, arrays, to_replace, value, expected_arrays", + [ + ( + [None, None], + [[1, 1, 2, 2], ["red", "blue", "red", "blue"]], + [1, "red"], + [0, "black"], + [[0, 0, 2, 2], ["black", "blue", "black", "blue"]], + ), + # names should be preserved + ( + ["digits", "colors"], + [[1, 1, 2, 2], ["red", "blue", "red", "blue"]], + 1, + 0, + [[0, 0, 2, 2], ["red", "blue", "red", "blue"]], + ), + ( + [None, None], + [[1, 1, 2, 2], ["red", "blue", "red", "blue"]], + 1, + 0, + [[0, 0, 2, 2], ["red", "blue", "red", "blue"]], + ), + ( + [None, None], + [[1, 1, 2, 2], ["red", "blue", "red", "blue"]], + [1, 2], + 0, + [[0, 0, 0, 0], ["red", "blue", "red", "blue"]], + ), + ( + [None, None], + [[1, 1, 2, 2], ["red", "blue", "red", "blue"]], + [1, 2], + 0, + [[0, 0, 0, 0], ["red", "blue", "red", "blue"]], + ), + # nested dicts + ( + ["digits", "colors"], + [[1, 1, 2, 2], ["red", "blue", "red", "blue"]], + {"digits": {1: 0}, "colors": {"red": "black"}}, + None, + [[0, 0, 2, 2], ["black", "blue", "black", "blue"]], + ), + # dicts and value + ( + ["digits", "colors"], + [[1, 1, 2, 2], ["red", "blue", "red", "blue"]], + {"digits": [1], "colors": ["red", "blue"]}, + "x", + [["x", "x", 2, 2], ["x", "x", "x", "x"]], + ), + ], +) +def test_multi_index_replace(names, arrays, to_replace, value, expected_arrays): + multi_index = pd.MultiIndex.from_arrays(arrays, names=names) + expected = pd.MultiIndex.from_arrays(expected_arrays, names=names) + + result = multi_index.replace(to_replace=to_replace, value=value) + + tm.assert_equal(result, expected)