|
20 | 20 | import warnings
|
21 | 21 | from textwrap import dedent
|
22 | 22 |
|
23 |
| -import pandas as pd |
24 | 23 | import numpy as np
|
25 | 24 | import numpy.ma as ma
|
26 | 25 |
|
|
43 | 42 | is_datetimetz,
|
44 | 43 | is_datetime64_any_dtype,
|
45 | 44 | is_datetime64tz_dtype,
|
46 |
| - is_bool, |
47 | 45 | is_bool_dtype,
|
48 | 46 | is_integer_dtype,
|
49 | 47 | is_float_dtype,
|
@@ -3069,218 +3067,9 @@ def fillna(self, value=None, method=None, axis=None, inplace=False,
|
3069 | 3067 | inplace=inplace, limit=limit,
|
3070 | 3068 | downcast=downcast, **kwargs)
|
3071 | 3069 |
|
| 3070 | + @Appender(_shared_docs['replace'] % _shared_doc_kwargs) |
3072 | 3071 | def replace(self, to_replace=None, value=None, inplace=False, limit=None,
|
3073 | 3072 | regex=False, method='pad', axis=None):
|
3074 |
| - """ |
3075 |
| - Replace values given in 'to_replace' with 'value'. |
3076 |
| -
|
3077 |
| - Parameters |
3078 |
| - ---------- |
3079 |
| - to_replace : str, regex, list, dict, Series, numeric, or None |
3080 |
| -
|
3081 |
| - * numeric, str or regex: |
3082 |
| -
|
3083 |
| - - numeric: numeric values equal to ``to_replace`` will be |
3084 |
| - replaced with ``value`` |
3085 |
| - - str: string exactly matching `to_replace` will be replaced |
3086 |
| - with ``value`` |
3087 |
| - - regex: regexs matching ``to_replace`` will be replaced with |
3088 |
| - ``value`` |
3089 |
| -
|
3090 |
| - * list of str, regex, or numeric: |
3091 |
| -
|
3092 |
| - - First, if ``to_replace`` and ``value`` are both lists, they |
3093 |
| - **must** be the same length. |
3094 |
| - - Second, if ``regex=True`` then all of the strings in **both** |
3095 |
| - lists will be interpreted as regexs otherwise they will match |
3096 |
| - directly. This doesn't matter much for ``value`` since there |
3097 |
| - are only a few possible substitution regexes you can use. |
3098 |
| - - str and regex rules apply as above. |
3099 |
| -
|
3100 |
| - * dict: |
3101 |
| -
|
3102 |
| - - Dicts can be used to specify different replacement values |
3103 |
| - for different existing values. For example, |
3104 |
| - {'a': 'b', 'y': 'z'} replaces the value 'a' with 'b' and |
3105 |
| - 'y' with 'z'. To use a dict in this way the ``value`` |
3106 |
| - parameter should be ``None``. |
3107 |
| - - Alternatively, a dict can specify that different values |
3108 |
| - should be replaced in different columns. For example, |
3109 |
| - {'a': 1, 'b': 'z'} looks for the value 1 in column 'a' and |
3110 |
| - the value 'z' in column 'b' and replaces these values with |
3111 |
| - whatever is specified in ``value``. The ``value`` parameter |
3112 |
| - should not be ``None`` in this case. You can treat this as a |
3113 |
| - special case of passing two lists except that you are |
3114 |
| - specifying the column to search in. |
3115 |
| - - Nested dictionaries, e.g., {'a': {'b': np.nan}}, are read as |
3116 |
| - follows: look in column 'a' for the value 'b' and replace it |
3117 |
| - with NaN. The ``value`` parameter should be ``None`` to use |
3118 |
| - a nested dict in this way. You can nest regular expressions |
3119 |
| - as well. Note that column names (the top-level dictionary |
3120 |
| - keys in a nested dictionary) **cannot** be regular |
3121 |
| - expressions. |
3122 |
| -
|
3123 |
| - * None: |
3124 |
| -
|
3125 |
| - - This means that the ``regex`` argument must be a string, |
3126 |
| - compiled regular expression, or list, dict, ndarray or Series |
3127 |
| - of such elements. If ``value`` is also ``None`` then this |
3128 |
| - **must** be a nested dictionary or ``Series``. |
3129 |
| -
|
3130 |
| - See the examples section for examples of each of these. |
3131 |
| - value : scalar, dict, list, str, regex, default None |
3132 |
| - Value to replace any values matching ``to_replace`` with. |
3133 |
| - Alternatively, a dict of values specifying which value to use for |
3134 |
| - each column (columns not in the dict will not be filled). Regular |
3135 |
| - expressions, strings and lists or dicts of such objects are also |
3136 |
| - allowed. |
3137 |
| - inplace : boolean, default False |
3138 |
| - If True, in place. Note: this will modify any |
3139 |
| - other views on this object (e.g. a column from a DataFrame). |
3140 |
| - Returns the caller if this is True. |
3141 |
| - limit : int, default None |
3142 |
| - Maximum size gap to forward or backward fill |
3143 |
| - regex : bool or same types as `to_replace`, default False |
3144 |
| - Whether to interpret ``to_replace`` and/or ``value`` as regular |
3145 |
| - expressions. If this is ``True`` then ``to_replace`` *must* be a |
3146 |
| - string. Alternatively, this could be a regular expression or a |
3147 |
| - list, dict, or array of regular expressions in which case |
3148 |
| - ``to_replace`` must be ``None``. |
3149 |
| - method : string, optional, {'pad', 'ffill', 'bfill'} |
3150 |
| - The method to use when for replacement, when ``to_replace`` is a |
3151 |
| - ``list``. |
3152 |
| -
|
3153 |
| - See Also |
3154 |
| - -------- |
3155 |
| - DataFrame.fillna : Fill NA/NaN values |
3156 |
| - DataFrame.where : Replace values based on boolean condition |
3157 |
| -
|
3158 |
| - Returns |
3159 |
| - ------- |
3160 |
| - filled : DataFrame |
3161 |
| -
|
3162 |
| - Raises |
3163 |
| - ------ |
3164 |
| - AssertionError |
3165 |
| - * If ``regex`` is not a ``bool`` and ``to_replace`` is not |
3166 |
| - ``None``. |
3167 |
| - TypeError |
3168 |
| - * If ``to_replace`` is a ``dict`` and `value` is not a ``list``, |
3169 |
| - ``dict``, ``ndarray``, or ``Series`` |
3170 |
| - * If ``to_replace`` is ``None`` and ``regex`` is not compilable |
3171 |
| - into a regular expression or is a list, dict, ndarray, or |
3172 |
| - Series. |
3173 |
| - * When replacing multiple ``bool`` or ``datetime64`` objects and |
3174 |
| - the arguments to `to_replace` does not match the type of the |
3175 |
| - value being replaced |
3176 |
| - ValueError |
3177 |
| - * If a ``list`` or an ``ndarray`` is passed to `to_replace` and |
3178 |
| - `value` but they are not the same length. |
3179 |
| -
|
3180 |
| - Notes |
3181 |
| - ----- |
3182 |
| - * Regex substitution is performed under the hood with ``re.sub``. The |
3183 |
| - rules for substitution for ``re.sub`` are the same. |
3184 |
| - * Regular expressions will only substitute on strings, meaning you |
3185 |
| - cannot provide, for example, a regular expression matching floating |
3186 |
| - point numbers and expect the columns in your frame that have a |
3187 |
| - numeric dtype to be matched. However, if those floating point |
3188 |
| - numbers *are* strings, then you can do this. |
3189 |
| - * This method has *a lot* of options. You are encouraged to experiment |
3190 |
| - and play with this method to gain intuition about how it works. |
3191 |
| -
|
3192 |
| - Examples |
3193 |
| - -------- |
3194 |
| -
|
3195 |
| - >>> df = pd.DataFrame({'A': [0, 1, 2, 3, 4], |
3196 |
| - ... 'B': [5, 6, 7, 8, 9], |
3197 |
| - ... 'C': ['a', 'b', 'c', 'd', 'e']}) |
3198 |
| - >>> df.replace(0, 5) |
3199 |
| - A B C |
3200 |
| - 0 5 5 a |
3201 |
| - 1 1 6 b |
3202 |
| - 2 2 7 c |
3203 |
| - 3 3 8 d |
3204 |
| - 4 4 9 e |
3205 |
| -
|
3206 |
| - >>> df.replace([0, 1, 2, 3], 4) |
3207 |
| - A B C |
3208 |
| - 0 4 5 a |
3209 |
| - 1 4 6 b |
3210 |
| - 2 4 7 c |
3211 |
| - 3 4 8 d |
3212 |
| - 4 4 9 e |
3213 |
| - >>> df.replace([0, 1, 2, 3], [4, 3, 2, 1]) |
3214 |
| - A B C |
3215 |
| - 0 4 5 a |
3216 |
| - 1 3 6 b |
3217 |
| - 2 2 7 c |
3218 |
| - 3 1 8 d |
3219 |
| - 4 4 9 e |
3220 |
| -
|
3221 |
| - >>> df.replace({0: 10, 1: 100}) |
3222 |
| - A B C |
3223 |
| - 0 10 5 a |
3224 |
| - 1 100 6 b |
3225 |
| - 2 2 7 c |
3226 |
| - 3 3 8 d |
3227 |
| - 4 4 9 e |
3228 |
| - >>> df.replace({'A': 0, 'B': 5}, 100) |
3229 |
| - A B C |
3230 |
| - 0 100 100 a |
3231 |
| - 1 1 6 b |
3232 |
| - 2 2 7 c |
3233 |
| - 3 3 8 d |
3234 |
| - 4 4 9 e |
3235 |
| - >>> df.replace({'A': {0: 100, 4: 400}}) |
3236 |
| - A B C |
3237 |
| - 0 100 5 a |
3238 |
| - 1 1 6 b |
3239 |
| - 2 2 7 c |
3240 |
| - 3 3 8 d |
3241 |
| - 4 400 9 e |
3242 |
| -
|
3243 |
| - >>> df = pd.DataFrame({'A': ['bat', 'foo', 'bait'], |
3244 |
| - ... 'B': ['abc', 'bar', 'xyz']}) |
3245 |
| - >>> df.replace(to_replace=r'^ba.$', value='new', regex=True) |
3246 |
| - A B |
3247 |
| - 0 new abc |
3248 |
| - 1 foo new |
3249 |
| - 2 bait xyz |
3250 |
| - >>> df.replace({'A': r'^ba.$'}, {'A': 'new'}, regex=True) |
3251 |
| - A B |
3252 |
| - 0 new abc |
3253 |
| - 1 foo bar |
3254 |
| - 2 bait xyz |
3255 |
| - >>> df.replace(regex=r'^ba.$', value='new') |
3256 |
| - A B |
3257 |
| - 0 new abc |
3258 |
| - 1 foo new |
3259 |
| - 2 bait xyz |
3260 |
| - >>> df.replace(regex={r'^ba.$':'new', 'foo':'xyz'}) |
3261 |
| - A B |
3262 |
| - 0 new abc |
3263 |
| - 1 xyz new |
3264 |
| - 2 bait xyz |
3265 |
| - >>> df.replace(regex=[r'^ba.$', 'foo'], value='new') |
3266 |
| - A B |
3267 |
| - 0 new abc |
3268 |
| - 1 new new |
3269 |
| - 2 bait xyz |
3270 |
| -
|
3271 |
| - Note that when replacing multiple ``bool`` or ``datetime64`` objects, |
3272 |
| - the data types in the ``to_replace`` parameter must match the data |
3273 |
| - type of the value being replaced: |
3274 |
| -
|
3275 |
| - >>> df = pd.DataFrame({'A': [True, False, True], |
3276 |
| - ... 'B': [False, True, False]}) |
3277 |
| - >>> df.replace({'a string': 'new value', True: False}) # raises |
3278 |
| - TypeError: Cannot compare types 'ndarray(dtype=bool)' and 'str' |
3279 |
| -
|
3280 |
| - This raises a ``TypeError`` because one of the ``dict`` keys is not of |
3281 |
| - the correct type for replacement. |
3282 |
| -
|
3283 |
| - """ |
3284 | 3073 | return super(DataFrame, self).replace(to_replace=to_replace,
|
3285 | 3074 | value=value, inplace=inplace,
|
3286 | 3075 | limit=limit, regex=regex,
|
|
0 commit comments