diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 1bf40f782f666..78b2accf92752 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -180,6 +180,9 @@ "axis": """axis : {0 or 'index', 1 or 'columns'}, default 0 If 0 or 'index': apply function to each column. If 1 or 'columns': apply function to each row.""", + "inplace": """ + inplace : boolean, default False + If True, performs operation inplace and returns None.""", "optional_by": """ by : str or list of str Name or list of names to sort by. @@ -193,6 +196,9 @@ "optional_axis": """axis : int or str, optional Axis to target. Can be either the axis name ('index', 'columns') or number (0, 1).""", + "replace_iloc": """ + This differs from updating with ``.loc`` or ``.iloc``, which require + you to specify a location to update with some value.""", } _numeric_only_doc = """numeric_only : boolean, default None diff --git a/pandas/core/generic.py b/pandas/core/generic.py index f9aa5ca9e8ea9..8e2f21bba5dfd 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -133,9 +133,15 @@ "klass": "Series/DataFrame", "axes_single_arg": "int or labels for object", "args_transpose": "axes to permute (int or label for object)", + "inplace": """ + inplace : boolean, default False + If True, performs operation inplace and returns None.""", "optional_by": """ by : str or list of str Name or list of names to sort by""", + "replace_iloc": """ + This differs from updating with ``.loc`` or ``.iloc``, which require + you to specify a location to update with some value.""", } @@ -6469,7 +6475,12 @@ def bfill( backfill = bfill - @doc(klass=_shared_doc_kwargs["klass"]) + @doc( + _shared_docs["replace"], + klass=_shared_doc_kwargs["klass"], + inplace=_shared_doc_kwargs["inplace"], + replace_iloc=_shared_doc_kwargs["replace_iloc"], + ) def replace( self, to_replace=None, @@ -6479,282 +6490,6 @@ def replace( regex=False, method="pad", ): - """ - Replace values given in `to_replace` with `value`. - - Values of the {klass} are replaced with other values dynamically. - This differs from updating with ``.loc`` or ``.iloc``, which require - you to specify a location to update with some value. - - Parameters - ---------- - to_replace : str, regex, list, dict, Series, int, float, or None - How to find the values that will be replaced. - - * numeric, str or regex: - - - numeric: numeric values equal to `to_replace` will be - replaced with `value` - - str: string exactly matching `to_replace` will be replaced - with `value` - - regex: regexs matching `to_replace` will be replaced with - `value` - - * list of str, regex, or numeric: - - - First, if `to_replace` and `value` are both lists, they - **must** be the same length. - - Second, if ``regex=True`` then all of the strings in **both** - lists will be interpreted as regexs otherwise they will match - directly. This doesn't matter much for `value` since there - are only a few possible substitution regexes you can use. - - str, regex and numeric rules apply as above. - - * dict: - - - Dicts can be used to specify different replacement values - for different existing values. For example, - ``{{'a': 'b', 'y': 'z'}}`` replaces the value 'a' with 'b' and - 'y' with 'z'. To use a dict in this way the `value` - parameter should be `None`. - - For a DataFrame a dict can specify that different values - should be replaced in different columns. For example, - ``{{'a': 1, 'b': 'z'}}`` looks for the value 1 in column 'a' - and the value 'z' in column 'b' and replaces these values - with whatever is specified in `value`. The `value` parameter - should not be ``None`` in this case. You can treat this as a - special case of passing two lists except that you are - specifying the column to search in. - - For a DataFrame nested dictionaries, e.g., - ``{{'a': {{'b': np.nan}}}}``, are read as follows: look in column - 'a' for the value 'b' and replace it with NaN. The `value` - parameter should be ``None`` to use a nested dict in this - way. You can nest regular expressions as well. Note that - column names (the top-level dictionary keys in a nested - dictionary) **cannot** be regular expressions. - - * None: - - - This means that the `regex` argument must be a string, - compiled regular expression, or list, dict, ndarray or - Series of such elements. If `value` is also ``None`` then - this **must** be a nested dictionary or Series. - - See the examples section for examples of each of these. - value : scalar, dict, list, str, regex, default None - Value to replace any values matching `to_replace` with. - For a DataFrame a dict of values can be used to specify which - value to use for each column (columns not in the dict will not be - filled). Regular expressions, strings and lists or dicts of such - objects are also allowed. - inplace : bool, default False - If True, in place. Note: this will modify any - other views on this object (e.g. a column from a DataFrame). - Returns the caller if this is True. - limit : int or None, default None - Maximum size gap to forward or backward fill. - regex : bool or same types as `to_replace`, default False - Whether to interpret `to_replace` and/or `value` as regular - expressions. If this is ``True`` then `to_replace` *must* be a - string. Alternatively, this could be a regular expression or a - list, dict, or array of regular expressions in which case - `to_replace` must be ``None``. - method : {{'pad', 'ffill', 'bfill', `None`}} - The method to use when for replacement, when `to_replace` is a - scalar, list or tuple and `value` is ``None``. - - Returns - ------- - {klass} or None - Object after replacement or None if ``inplace=True``. - - Raises - ------ - AssertionError - * If `regex` is not a ``bool`` and `to_replace` is not - ``None``. - - TypeError - * If `to_replace` is not a scalar, array-like, ``dict``, or ``None`` - * If `to_replace` is a ``dict`` and `value` is not a ``list``, - ``dict``, ``ndarray``, or ``Series`` - * If `to_replace` is ``None`` and `regex` is not compilable - into a regular expression or is a list, dict, ndarray, or - Series. - * When replacing multiple ``bool`` or ``datetime64`` objects and - the arguments to `to_replace` does not match the type of the - value being replaced - - ValueError - * If a ``list`` or an ``ndarray`` is passed to `to_replace` and - `value` but they are not the same length. - - See Also - -------- - {klass}.fillna : Fill NA values. - {klass}.where : Replace values based on boolean condition. - Series.str.replace : Simple string replacement. - - Notes - ----- - * Regex substitution is performed under the hood with ``re.sub``. The - rules for substitution for ``re.sub`` are the same. - * Regular expressions will only substitute on strings, meaning you - cannot provide, for example, a regular expression matching floating - point numbers and expect the columns in your frame that have a - numeric dtype to be matched. However, if those floating point - numbers *are* strings, then you can do this. - * This method has *a lot* of options. You are encouraged to experiment - and play with this method to gain intuition about how it works. - * When dict is used as the `to_replace` value, it is like - key(s) in the dict are the to_replace part and - value(s) in the dict are the value parameter. - - Examples - -------- - - **Scalar `to_replace` and `value`** - - >>> s = pd.Series([0, 1, 2, 3, 4]) - >>> s.replace(0, 5) - 0 5 - 1 1 - 2 2 - 3 3 - 4 4 - dtype: int64 - - >>> df = pd.DataFrame({{'A': [0, 1, 2, 3, 4], - ... 'B': [5, 6, 7, 8, 9], - ... 'C': ['a', 'b', 'c', 'd', 'e']}}) - >>> df.replace(0, 5) - A B C - 0 5 5 a - 1 1 6 b - 2 2 7 c - 3 3 8 d - 4 4 9 e - - **List-like `to_replace`** - - >>> df.replace([0, 1, 2, 3], 4) - A B C - 0 4 5 a - 1 4 6 b - 2 4 7 c - 3 4 8 d - 4 4 9 e - - >>> df.replace([0, 1, 2, 3], [4, 3, 2, 1]) - A B C - 0 4 5 a - 1 3 6 b - 2 2 7 c - 3 1 8 d - 4 4 9 e - - >>> s.replace([1, 2], method='bfill') - 0 0 - 1 3 - 2 3 - 3 3 - 4 4 - dtype: int64 - - **dict-like `to_replace`** - - >>> df.replace({{0: 10, 1: 100}}) - A B C - 0 10 5 a - 1 100 6 b - 2 2 7 c - 3 3 8 d - 4 4 9 e - - >>> df.replace({{'A': 0, 'B': 5}}, 100) - A B C - 0 100 100 a - 1 1 6 b - 2 2 7 c - 3 3 8 d - 4 4 9 e - - >>> df.replace({{'A': {{0: 100, 4: 400}}}}) - A B C - 0 100 5 a - 1 1 6 b - 2 2 7 c - 3 3 8 d - 4 400 9 e - - **Regular expression `to_replace`** - - >>> df = pd.DataFrame({{'A': ['bat', 'foo', 'bait'], - ... 'B': ['abc', 'bar', 'xyz']}}) - >>> df.replace(to_replace=r'^ba.$', value='new', regex=True) - A B - 0 new abc - 1 foo new - 2 bait xyz - - >>> df.replace({{'A': r'^ba.$'}}, {{'A': 'new'}}, regex=True) - A B - 0 new abc - 1 foo bar - 2 bait xyz - - >>> df.replace(regex=r'^ba.$', value='new') - A B - 0 new abc - 1 foo new - 2 bait xyz - - >>> df.replace(regex={{r'^ba.$': 'new', 'foo': 'xyz'}}) - A B - 0 new abc - 1 xyz new - 2 bait xyz - - >>> df.replace(regex=[r'^ba.$', 'foo'], value='new') - A B - 0 new abc - 1 new new - 2 bait xyz - - Compare the behavior of ``s.replace({{'a': None}})`` and - ``s.replace('a', None)`` to understand the peculiarities - of the `to_replace` parameter: - - >>> s = pd.Series([10, 'a', 'a', 'b', 'a']) - - When one uses a dict as the `to_replace` value, it is like the - value(s) in the dict are equal to the `value` parameter. - ``s.replace({{'a': None}})`` is equivalent to - ``s.replace(to_replace={{'a': None}}, value=None, method=None)``: - - >>> s.replace({{'a': None}}) - 0 10 - 1 None - 2 None - 3 b - 4 None - dtype: object - - When ``value=None`` and `to_replace` is a scalar, list or - tuple, `replace` uses the method parameter (default 'pad') to do the - replacement. So this is why the 'a' values are being replaced by 10 - in rows 1 and 2 and 'b' in row 4 in this case. - The command ``s.replace('a', None)`` is actually equivalent to - ``s.replace(to_replace='a', value=None, method='pad')``: - - >>> s.replace('a', None) - 0 10 - 1 10 - 2 10 - 3 b - 4 b - dtype: object - """ if not ( is_scalar(to_replace) or is_re_compilable(to_replace) diff --git a/pandas/core/series.py b/pandas/core/series.py index 0e9476285c258..4351c21c5c7e5 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -125,6 +125,9 @@ "optional_mapper": "", "optional_labels": "", "optional_axis": "", + "replace_iloc": """ + This differs from updating with ``.loc`` or ``.iloc``, which require + you to specify a location to update with some value.""", } @@ -4473,7 +4476,12 @@ def pop(self, item: Label) -> Any: """ return super().pop(item=item) - @doc(NDFrame.replace, klass=_shared_doc_kwargs["klass"]) + @doc( + NDFrame.replace, + klass=_shared_doc_kwargs["klass"], + inplace=_shared_doc_kwargs["inplace"], + replace_iloc=_shared_doc_kwargs["replace_iloc"], + ) def replace( self, to_replace=None, diff --git a/pandas/core/shared_docs.py b/pandas/core/shared_docs.py index 6d3249802ee5e..ad2eafe7295b0 100644 --- a/pandas/core/shared_docs.py +++ b/pandas/core/shared_docs.py @@ -387,3 +387,281 @@ are forwarded to ``urllib`` as header options. For other URLs (e.g. starting with "s3://", and "gcs://") the key-value pairs are forwarded to ``fsspec``. Please see ``fsspec`` and ``urllib`` for more details.""" + +_shared_docs[ + "replace" +] = """ + Replace values given in `to_replace` with `value`. + + Values of the {klass} are replaced with other values dynamically. + {replace_iloc} + + Parameters + ---------- + to_replace : str, regex, list, dict, Series, int, float, or None + How to find the values that will be replaced. + + * numeric, str or regex: + + - numeric: numeric values equal to `to_replace` will be + replaced with `value` + - str: string exactly matching `to_replace` will be replaced + with `value` + - regex: regexs matching `to_replace` will be replaced with + `value` + + * list of str, regex, or numeric: + + - First, if `to_replace` and `value` are both lists, they + **must** be the same length. + - Second, if ``regex=True`` then all of the strings in **both** + lists will be interpreted as regexs otherwise they will match + directly. This doesn't matter much for `value` since there + are only a few possible substitution regexes you can use. + - str, regex and numeric rules apply as above. + + * dict: + + - Dicts can be used to specify different replacement values + for different existing values. For example, + ``{{'a': 'b', 'y': 'z'}}`` replaces the value 'a' with 'b' and + 'y' with 'z'. To use a dict in this way the `value` + parameter should be `None`. + - For a DataFrame a dict can specify that different values + should be replaced in different columns. For example, + ``{{'a': 1, 'b': 'z'}}`` looks for the value 1 in column 'a' + and the value 'z' in column 'b' and replaces these values + with whatever is specified in `value`. The `value` parameter + should not be ``None`` in this case. You can treat this as a + special case of passing two lists except that you are + specifying the column to search in. + - For a DataFrame nested dictionaries, e.g., + ``{{'a': {{'b': np.nan}}}}``, are read as follows: look in column + 'a' for the value 'b' and replace it with NaN. The `value` + parameter should be ``None`` to use a nested dict in this + way. You can nest regular expressions as well. Note that + column names (the top-level dictionary keys in a nested + dictionary) **cannot** be regular expressions. + + * None: + + - This means that the `regex` argument must be a string, + compiled regular expression, or list, dict, ndarray or + Series of such elements. If `value` is also ``None`` then + this **must** be a nested dictionary or Series. + + See the examples section for examples of each of these. + value : scalar, dict, list, str, regex, default None + Value to replace any values matching `to_replace` with. + For a DataFrame a dict of values can be used to specify which + value to use for each column (columns not in the dict will not be + filled). Regular expressions, strings and lists or dicts of such + objects are also allowed. + {inplace} + limit : int, default None + Maximum size gap to forward or backward fill. + regex : bool or same types as `to_replace`, default False + Whether to interpret `to_replace` and/or `value` as regular + expressions. If this is ``True`` then `to_replace` *must* be a + string. Alternatively, this could be a regular expression or a + list, dict, or array of regular expressions in which case + `to_replace` must be ``None``. + method : {{'pad', 'ffill', 'bfill', `None`}} + The method to use when for replacement, when `to_replace` is a + scalar, list or tuple and `value` is ``None``. + + .. versionchanged:: 0.23.0 + Added to DataFrame. + + Returns + ------- + {klass} + Object after replacement. + + Raises + ------ + AssertionError + * If `regex` is not a ``bool`` and `to_replace` is not + ``None``. + + TypeError + * If `to_replace` is not a scalar, array-like, ``dict``, or ``None`` + * If `to_replace` is a ``dict`` and `value` is not a ``list``, + ``dict``, ``ndarray``, or ``Series`` + * If `to_replace` is ``None`` and `regex` is not compilable + into a regular expression or is a list, dict, ndarray, or + Series. + * When replacing multiple ``bool`` or ``datetime64`` objects and + the arguments to `to_replace` does not match the type of the + value being replaced + + ValueError + * If a ``list`` or an ``ndarray`` is passed to `to_replace` and + `value` but they are not the same length. + + See Also + -------- + {klass}.fillna : Fill NA values. + {klass}.where : Replace values based on boolean condition. + Series.str.replace : Simple string replacement. + + Notes + ----- + * Regex substitution is performed under the hood with ``re.sub``. The + rules for substitution for ``re.sub`` are the same. + * Regular expressions will only substitute on strings, meaning you + cannot provide, for example, a regular expression matching floating + point numbers and expect the columns in your frame that have a + numeric dtype to be matched. However, if those floating point + numbers *are* strings, then you can do this. + * This method has *a lot* of options. You are encouraged to experiment + and play with this method to gain intuition about how it works. + * When dict is used as the `to_replace` value, it is like + key(s) in the dict are the to_replace part and + value(s) in the dict are the value parameter. + + Examples + -------- + + **Scalar `to_replace` and `value`** + + >>> s = pd.Series([0, 1, 2, 3, 4]) + >>> s.replace(0, 5) + 0 5 + 1 1 + 2 2 + 3 3 + 4 4 + dtype: int64 + + >>> df = pd.DataFrame({{'A': [0, 1, 2, 3, 4], + ... 'B': [5, 6, 7, 8, 9], + ... 'C': ['a', 'b', 'c', 'd', 'e']}}) + >>> df.replace(0, 5) + A B C + 0 5 5 a + 1 1 6 b + 2 2 7 c + 3 3 8 d + 4 4 9 e + + **List-like `to_replace`** + + >>> df.replace([0, 1, 2, 3], 4) + A B C + 0 4 5 a + 1 4 6 b + 2 4 7 c + 3 4 8 d + 4 4 9 e + + >>> df.replace([0, 1, 2, 3], [4, 3, 2, 1]) + A B C + 0 4 5 a + 1 3 6 b + 2 2 7 c + 3 1 8 d + 4 4 9 e + + >>> s.replace([1, 2], method='bfill') + 0 0 + 1 3 + 2 3 + 3 3 + 4 4 + dtype: int64 + + **dict-like `to_replace`** + + >>> df.replace({{0: 10, 1: 100}}) + A B C + 0 10 5 a + 1 100 6 b + 2 2 7 c + 3 3 8 d + 4 4 9 e + + >>> df.replace({{'A': 0, 'B': 5}}, 100) + A B C + 0 100 100 a + 1 1 6 b + 2 2 7 c + 3 3 8 d + 4 4 9 e + + >>> df.replace({{'A': {{0: 100, 4: 400}}}}) + A B C + 0 100 5 a + 1 1 6 b + 2 2 7 c + 3 3 8 d + 4 400 9 e + + **Regular expression `to_replace`** + + >>> df = pd.DataFrame({{'A': ['bat', 'foo', 'bait'], + ... 'B': ['abc', 'bar', 'xyz']}}) + >>> df.replace(to_replace=r'^ba.$', value='new', regex=True) + A B + 0 new abc + 1 foo new + 2 bait xyz + + >>> df.replace({{'A': r'^ba.$'}}, {{'A': 'new'}}, regex=True) + A B + 0 new abc + 1 foo bar + 2 bait xyz + + >>> df.replace(regex=r'^ba.$', value='new') + A B + 0 new abc + 1 foo new + 2 bait xyz + + >>> df.replace(regex={{r'^ba.$': 'new', 'foo': 'xyz'}}) + A B + 0 new abc + 1 xyz new + 2 bait xyz + + >>> df.replace(regex=[r'^ba.$', 'foo'], value='new') + A B + 0 new abc + 1 new new + 2 bait xyz + + Compare the behavior of ``s.replace({{'a': None}})`` and + ``s.replace('a', None)`` to understand the peculiarities + of the `to_replace` parameter: + + >>> s = pd.Series([10, 'a', 'a', 'b', 'a']) + + When one uses a dict as the `to_replace` value, it is like the + value(s) in the dict are equal to the `value` parameter. + ``s.replace({{'a': None}})`` is equivalent to + ``s.replace(to_replace={{'a': None}}, value=None, method=None)``: + + >>> s.replace({{'a': None}}) + 0 10 + 1 None + 2 None + 3 b + 4 None + dtype: object + + When ``value=None`` and `to_replace` is a scalar, list or + tuple, `replace` uses the method parameter (default 'pad') to do the + replacement. So this is why the 'a' values are being replaced by 10 + in rows 1 and 2 and 'b' in row 4 in this case. + The command ``s.replace('a', None)`` is actually equivalent to + ``s.replace(to_replace='a', value=None, method='pad')``: + + >>> s.replace('a', None) + 0 10 + 1 10 + 2 10 + 3 b + 4 b + dtype: object +"""