From f1f11e3c250f63a056daaebf5d0d383e89907e14 Mon Sep 17 00:00:00 2001 From: Paul Reidy Date: Sat, 4 Nov 2017 01:20:38 +0000 Subject: [PATCH 1/5] DOC: Improve replace docstring --- pandas/core/frame.py | 384 +++++++++++++++++++++++++++++++++++++++++ pandas/core/generic.py | 250 --------------------------- pandas/core/series.py | 357 +++++++++++++++++++++++++++++++++++++- 3 files changed, 740 insertions(+), 251 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 96d28581cfdd9..746e63f847e41 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -20,6 +20,7 @@ import warnings from textwrap import dedent +import pandas as pd import numpy as np import numpy.ma as ma @@ -42,6 +43,7 @@ is_datetimetz, is_datetime64_any_dtype, is_datetime64tz_dtype, + is_bool, is_bool_dtype, is_integer_dtype, is_float_dtype, @@ -236,6 +238,30 @@ """ +def _single_replace(self, to_replace, method, inplace, limit): + if self.ndim != 1: + raise TypeError('cannot replace {0} with method {1} on a {2}' + .format(to_replace, method, type(self).__name__)) + + orig_dtype = self.dtype + result = self if inplace else self.copy() + fill_f = missing.get_fill_func(method) + + mask = missing.mask_missing(result.values, to_replace) + values = fill_f(result.values, limit=limit, mask=mask) + + if values.dtype == orig_dtype and inplace: + return + + result = pd.Series(values, index=self.index, + dtype=self.dtype).__finalize__(self) + + if inplace: + self._update_inplace(result._data) + return + + return result + # ----------------------------------------------------------------------- # DataFrame class @@ -3080,6 +3106,364 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, inplace=inplace, limit=limit, downcast=downcast, **kwargs) + def replace(self, to_replace=None, value=None, inplace=False, limit=None, + regex=False, method='pad', axis=None): + """ + Replace values given in 'to_replace' with 'value'. + + Parameters + ---------- + to_replace : str, regex, list, dict, Series, numeric, or None + + * numeric, str or regex: + + - numeric: numeric values equal to ``to_replace`` will be + replaced with ``value`` + - str: string exactly matching `to_replace` will be replaced + with ``value`` + - regex: regexs matching ``to_replace`` will be replaced with + ``value`` + + * list of str, regex, or numeric: + + - First, if ``to_replace`` and ``value`` are both lists, they + **must** be the same length. + - Second, if ``regex=True`` then all of the strings in **both** + lists will be interpreted as regexs otherwise they will match + directly. This doesn't matter much for ``value`` since there + are only a few possible substitution regexes you can use. + - str and regex rules apply as above. + + * dict: + + - Dicts can be used to specify different replacement values + for different existing values. For example, + {'a': 'b', 'y': 'z'} replaces the value 'a' with 'b' and + 'y' with 'z'. To use a dict in this way the ``value`` + parameter should be ``None``. + - Alternatively, a dict can specify that different values + should be replaced in different columns. For example, + {'a': 1, 'b': 'z'} looks for the value 1 in column 'a' and + the value 'z' in column 'b' and replaces these values with + whatever is specified in ``value``. The ``value`` parameter + should not be ``None`` in this case. You can treat this as a + special case of passing two lists except that you are + specifying the column to search in. + - Nested dictionaries, e.g., {'a': {'b': np.nan}}, are read as + follows: look in column 'a' for the value 'b' and replace it + with NaN. The ``value`` parameter should be ``None`` to use + a nested dict in this way. You can nest regular expressions + as well. Note that column names (the top-level dictionary + keys in a nested dictionary) **cannot** be regular + expressions. + + * None: + + - This means that the ``regex`` argument must be a string, + compiled regular expression, or list, dict, ndarray or Series + of such elements. If ``value`` is also ``None`` then this + **must** be a nested dictionary or ``Series``. + + See the examples section for examples of each of these. + value : scalar, dict, list, str, regex, default None + Value to replace any values matching ``to_replace`` with. + Alternatively, a dict of values specifying which value to use for + each column (columns not in the dict will not be filled). Regular + expressions, strings and lists or dicts of such objects are also + allowed. + inplace : boolean, default False + If True, in place. Note: this will modify any + other views on this object (e.g. a column from a DataFrame). + Returns the caller if this is True. + limit : int, default None + Maximum size gap to forward or backward fill + regex : bool or same types as `to_replace`, default False + Whether to interpret ``to_replace`` and/or ``value`` as regular + expressions. If this is ``True`` then ``to_replace`` *must* be a + string. Alternatively, this could be a regular expression or a list, + dict, or array of regular expressions in which case ``to_replace`` + must be ``None``. + method : string, optional, {'pad', 'ffill', 'bfill'} + The method to use when for replacement, when ``to_replace`` is a + ``list``. + + See Also + -------- + :func:`DataFrame.fillna` : Fill NA/NaN values + :func:`DataFrame.where` : Replace values based on boolean condition + + Returns + ------- + filled : DataFrame + + Raises + ------ + AssertionError + * If ``regex`` is not a ``bool`` and ``to_replace`` is not ``None``. + TypeError + * If ``to_replace`` is a ``dict`` and `value` is not a ``list``, + ``dict``, ``ndarray``, or ``Series`` + * If ``to_replace`` is ``None`` and ``regex`` is not compilable into a + regular expression or is a list, dict, ndarray, or Series. + * When replacing multiple ``bool`` or ``datetime64`` objects and the + the arguments to `to_replace` does not match the type of the + value being replaced + ValueError + * If a ``list`` or an ``ndarray`` is passed to `to_replace` and + `value` but they are not the same length. + + Notes + ----- + * Regex substitution is performed under the hood with ``re.sub``. The + rules for substitution for ``re.sub`` are the same. + * Regular expressions will only substitute on strings, meaning you + cannot provide, for example, a regular expression matching floating + point numbers and expect the columns in your frame that have a + numeric dtype to be matched. However, if those floating point numbers + *are* strings, then you can do this. + * This method has *a lot* of options. You are encouraged to experiment + and play with this method to gain intuition about how it works. + + Examples + -------- + + >>> df = pd.DataFrame({'A': [0, 1, 2, 3, 4], + ... 'B': [5, 6, 7, 8, 9], + ... 'C': ['a', 'b', 'c', 'd', 'e']}) + >>> df.replace(0, 5) + A B C + 0 5 5 a + 1 1 6 b + 2 2 7 c + 3 3 8 d + 4 4 9 e + + >>> df.replace([0, 1, 2, 3], 4) + A B C + 0 4 5 a + 1 4 6 b + 2 4 7 c + 3 4 8 d + 4 4 9 e + >>> df.replace([0, 1, 2, 3], [4, 3, 2, 1]) + A B C + 0 4 5 a + 1 3 6 b + 2 2 7 c + 3 1 8 d + 4 4 9 e + + >>> df.replace({0: 10, 1: 100}) + A B C + 0 10 5 a + 1 100 6 b + 2 2 7 c + 3 3 8 d + 4 4 9 e + >>> df.replace({'A': 0, 'B': 5}, 100) + A B C + 0 100 100 a + 1 1 6 b + 2 2 7 c + 3 3 8 d + 4 4 9 e + >>> df.replace({'A': {0: 100, 4: 400}}) + A B C + 0 100 5 a + 1 1 6 b + 2 2 7 c + 3 3 8 d + 4 400 9 e + + >>> df = pd.DataFrame({'A': ['bat', 'foo', 'bait'], + ... 'B': ['abc', 'bar', 'xyz']}) + >>> df.replace(to_replace=r'^ba.$', value='new', regex=True) + A B + 0 new abc + 1 foo new + 2 bait xyz + >>> df.replace({'A': r'^ba.$'}, {'A': 'new'}, regex=True) + A B + 0 new abc + 1 foo bar + 2 bait xyz + >>> df.replace(regex=r'^ba.$', value='new') + A B + 0 new abc + 1 foo new + 2 bait xyz + >>> df.replace(regex={r'^ba.$':'new', 'foo':'xyz'}) + A B + 0 new abc + 1 xyz new + 2 bait xyz + >>> df.replace(regex=[r'^ba.$', 'foo'], value='new') + A B + 0 new abc + 1 new new + 2 bait xyz + + Note that when replacing multiple ``bool`` or ``datetime64`` objects, + the data types in the ``to_replace`` parameter must match the data + type of the value being replaced: + + >>> df = pd.DataFrame({'A': [True, False, True], + ... 'B': [False, True, False]}) + >>> df.replace({'a string': 'new value', True: False}) # raises + TypeError: Cannot compare types 'ndarray(dtype=bool)' and 'str' + + This raises a ``TypeError`` because one of the ``dict`` keys is not of + the correct type for replacement. + + """ + inplace = validate_bool_kwarg(inplace, 'inplace') + if not is_bool(regex) and to_replace is not None: + raise AssertionError("'to_replace' must be 'None' if 'regex' is " + "not a bool") + if axis is not None: + warnings.warn('the "axis" argument is deprecated ' + 'and will be removed in' + 'v0.13; this argument has no effect') + + self._consolidate_inplace() + + if value is None: + # passing a single value that is scalar like + # when value is None (GH5319), for compat + if not is_dict_like(to_replace) and not is_dict_like(regex): + to_replace = [to_replace] + + if isinstance(to_replace, (tuple, list)): + return _single_replace(self, to_replace, method, inplace, + limit) + + if not is_dict_like(to_replace): + if not is_dict_like(regex): + raise TypeError('If "to_replace" and "value" are both None' + ' and "to_replace" is not a list, then ' + 'regex must be a mapping') + to_replace = regex + regex = True + + items = list(compat.iteritems(to_replace)) + keys, values = lzip(*items) or ([], []) + + are_mappings = [is_dict_like(v) for v in values] + + if any(are_mappings): + if not all(are_mappings): + raise TypeError("If a nested mapping is passed, all values" + " of the top level mapping must be " + "mappings") + # passed a nested dict/Series + to_rep_dict = {} + value_dict = {} + + for k, v in items: + keys, values = lzip(*v.items()) or ([], []) + if set(keys) & set(values): + raise ValueError("Replacement not allowed with " + "overlapping keys and values") + to_rep_dict[k] = list(keys) + value_dict[k] = list(values) + + to_replace, value = to_rep_dict, value_dict + else: + to_replace, value = keys, values + + return self.replace(to_replace, value, inplace=inplace, + limit=limit, regex=regex) + else: + + # need a non-zero len on all axes + for a in self._AXIS_ORDERS: + if not len(self._get_axis(a)): + return self + + new_data = self._data + if is_dict_like(to_replace): + if is_dict_like(value): # {'A' : NA} -> {'A' : 0} + res = self if inplace else self.copy() + for c, src in compat.iteritems(to_replace): + if c in value and c in self: + # object conversion is handled in + # series.replace which is called recursivelly + res[c] = res[c].replace(to_replace=src, + value=value[c], + inplace=False, + regex=regex) + return None if inplace else res + + # {'A': NA} -> 0 + elif not is_list_like(value): + keys = [(k, src) for k, src in compat.iteritems(to_replace) + if k in self] + keys_len = len(keys) - 1 + for i, (k, src) in enumerate(keys): + convert = i == keys_len + new_data = new_data.replace(to_replace=src, + value=value, + filter=[k], + inplace=inplace, + regex=regex, + convert=convert) + else: + raise TypeError('value argument must be scalar, dict, or ' + 'Series') + + elif is_list_like(to_replace): # [NA, ''] -> [0, 'missing'] + if is_list_like(value): + if len(to_replace) != len(value): + raise ValueError('Replacement lists must match ' + 'in length. Expecting %d got %d ' % + (len(to_replace), len(value))) + + new_data = self._data.replace_list(src_list=to_replace, + dest_list=value, + inplace=inplace, + regex=regex) + + else: # [NA, ''] -> 0 + new_data = self._data.replace(to_replace=to_replace, + value=value, inplace=inplace, + regex=regex) + elif to_replace is None: + if not (is_re_compilable(regex) or + is_list_like(regex) or is_dict_like(regex)): + raise TypeError("'regex' must be a string or a compiled " + "regular expression or a list or dict of " + "strings or regular expressions, you " + "passed a" + " {0!r}".format(type(regex).__name__)) + return self.replace(regex, value, inplace=inplace, limit=limit, + regex=True) + else: + + # dest iterable dict-like + if is_dict_like(value): # NA -> {'A' : 0, 'B' : -1} + new_data = self._data + + for k, v in compat.iteritems(value): + if k in self: + new_data = new_data.replace(to_replace=to_replace, + value=v, filter=[k], + inplace=inplace, + regex=regex) + + elif not is_list_like(value): # NA -> 0 + new_data = self._data.replace(to_replace=to_replace, + value=value, inplace=inplace, + regex=regex) + else: + msg = ('Invalid "to_replace" type: ' + '{0!r}').format(type(to_replace).__name__) + raise TypeError(msg) # pragma: no cover + + if inplace: + self._update_inplace(new_data) + else: + return self._constructor(new_data).__finalize__(self) + @Appender(_shared_docs['shift'] % _shared_doc_kwargs) def shift(self, periods=1, freq=None, axis=0): return super(DataFrame, self).shift(periods=periods, freq=freq, diff --git a/pandas/core/generic.py b/pandas/core/generic.py index d34a85b5b4388..02187773d7593 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4787,256 +4787,6 @@ def bfill(self, axis=None, inplace=False, limit=None, downcast=None): return self.fillna(method='bfill', axis=axis, inplace=inplace, limit=limit, downcast=downcast) - def replace(self, to_replace=None, value=None, inplace=False, limit=None, - regex=False, method='pad', axis=None): - """ - Replace values given in 'to_replace' with 'value'. - - Parameters - ---------- - to_replace : str, regex, list, dict, Series, numeric, or None - - * str or regex: - - - str: string exactly matching `to_replace` will be replaced - with `value` - - regex: regexs matching `to_replace` will be replaced with - `value` - - * list of str, regex, or numeric: - - - First, if `to_replace` and `value` are both lists, they - **must** be the same length. - - Second, if ``regex=True`` then all of the strings in **both** - lists will be interpreted as regexs otherwise they will match - directly. This doesn't matter much for `value` since there - are only a few possible substitution regexes you can use. - - str and regex rules apply as above. - - * dict: - - - Nested dictionaries, e.g., {'a': {'b': nan}}, are read as - follows: look in column 'a' for the value 'b' and replace it - with nan. You can nest regular expressions as well. Note that - column names (the top-level dictionary keys in a nested - dictionary) **cannot** be regular expressions. - - Keys map to column names and values map to substitution - values. You can treat this as a special case of passing two - lists except that you are specifying the column to search in. - - * None: - - - This means that the ``regex`` argument must be a string, - compiled regular expression, or list, dict, ndarray or Series - of such elements. If `value` is also ``None`` then this - **must** be a nested dictionary or ``Series``. - - See the examples section for examples of each of these. - value : scalar, dict, list, str, regex, default None - Value to use to fill holes (e.g. 0), alternately a dict of values - specifying which value to use for each column (columns not in the - dict will not be filled). Regular expressions, strings and lists or - dicts of such objects are also allowed. - inplace : boolean, default False - If True, in place. Note: this will modify any - other views on this object (e.g. a column from a DataFrame). - Returns the caller if this is True. - limit : int, default None - Maximum size gap to forward or backward fill - regex : bool or same types as `to_replace`, default False - Whether to interpret `to_replace` and/or `value` as regular - expressions. If this is ``True`` then `to_replace` *must* be a - string. Otherwise, `to_replace` must be ``None`` because this - parameter will be interpreted as a regular expression or a list, - dict, or array of regular expressions. - method : string, optional, {'pad', 'ffill', 'bfill'} - The method to use when for replacement, when ``to_replace`` is a - ``list``. - - See Also - -------- - NDFrame.reindex - NDFrame.asfreq - NDFrame.fillna - - Returns - ------- - filled : NDFrame - - Raises - ------ - AssertionError - * If `regex` is not a ``bool`` and `to_replace` is not ``None``. - TypeError - * If `to_replace` is a ``dict`` and `value` is not a ``list``, - ``dict``, ``ndarray``, or ``Series`` - * If `to_replace` is ``None`` and `regex` is not compilable into a - regular expression or is a list, dict, ndarray, or Series. - ValueError - * If `to_replace` and `value` are ``list`` s or ``ndarray`` s, but - they are not the same length. - - Notes - ----- - * Regex substitution is performed under the hood with ``re.sub``. The - rules for substitution for ``re.sub`` are the same. - * Regular expressions will only substitute on strings, meaning you - cannot provide, for example, a regular expression matching floating - point numbers and expect the columns in your frame that have a - numeric dtype to be matched. However, if those floating point numbers - *are* strings, then you can do this. - * This method has *a lot* of options. You are encouraged to experiment - and play with this method to gain intuition about how it works. - - """ - inplace = validate_bool_kwarg(inplace, 'inplace') - if not is_bool(regex) and to_replace is not None: - raise AssertionError("'to_replace' must be 'None' if 'regex' is " - "not a bool") - if axis is not None: - warnings.warn('the "axis" argument is deprecated ' - 'and will be removed in' - 'v0.13; this argument has no effect') - - self._consolidate_inplace() - - if value is None: - # passing a single value that is scalar like - # when value is None (GH5319), for compat - if not is_dict_like(to_replace) and not is_dict_like(regex): - to_replace = [to_replace] - - if isinstance(to_replace, (tuple, list)): - return _single_replace(self, to_replace, method, inplace, - limit) - - if not is_dict_like(to_replace): - if not is_dict_like(regex): - raise TypeError('If "to_replace" and "value" are both None' - ' and "to_replace" is not a list, then ' - 'regex must be a mapping') - to_replace = regex - regex = True - - items = list(compat.iteritems(to_replace)) - keys, values = lzip(*items) or ([], []) - - are_mappings = [is_dict_like(v) for v in values] - - if any(are_mappings): - if not all(are_mappings): - raise TypeError("If a nested mapping is passed, all values" - " of the top level mapping must be " - "mappings") - # passed a nested dict/Series - to_rep_dict = {} - value_dict = {} - - for k, v in items: - keys, values = lzip(*v.items()) or ([], []) - if set(keys) & set(values): - raise ValueError("Replacement not allowed with " - "overlapping keys and values") - to_rep_dict[k] = list(keys) - value_dict[k] = list(values) - - to_replace, value = to_rep_dict, value_dict - else: - to_replace, value = keys, values - - return self.replace(to_replace, value, inplace=inplace, - limit=limit, regex=regex) - else: - - # need a non-zero len on all axes - for a in self._AXIS_ORDERS: - if not len(self._get_axis(a)): - return self - - new_data = self._data - if is_dict_like(to_replace): - if is_dict_like(value): # {'A' : NA} -> {'A' : 0} - res = self if inplace else self.copy() - for c, src in compat.iteritems(to_replace): - if c in value and c in self: - # object conversion is handled in - # series.replace which is called recursivelly - res[c] = res[c].replace(to_replace=src, - value=value[c], - inplace=False, - regex=regex) - return None if inplace else res - - # {'A': NA} -> 0 - elif not is_list_like(value): - keys = [(k, src) for k, src in compat.iteritems(to_replace) - if k in self] - keys_len = len(keys) - 1 - for i, (k, src) in enumerate(keys): - convert = i == keys_len - new_data = new_data.replace(to_replace=src, - value=value, - filter=[k], - inplace=inplace, - regex=regex, - convert=convert) - else: - raise TypeError('value argument must be scalar, dict, or ' - 'Series') - - elif is_list_like(to_replace): # [NA, ''] -> [0, 'missing'] - if is_list_like(value): - if len(to_replace) != len(value): - raise ValueError('Replacement lists must match ' - 'in length. Expecting %d got %d ' % - (len(to_replace), len(value))) - - new_data = self._data.replace_list(src_list=to_replace, - dest_list=value, - inplace=inplace, - regex=regex) - - else: # [NA, ''] -> 0 - new_data = self._data.replace(to_replace=to_replace, - value=value, inplace=inplace, - regex=regex) - elif to_replace is None: - if not (is_re_compilable(regex) or - is_list_like(regex) or is_dict_like(regex)): - raise TypeError("'regex' must be a string or a compiled " - "regular expression or a list or dict of " - "strings or regular expressions, you " - "passed a" - " {0!r}".format(type(regex).__name__)) - return self.replace(regex, value, inplace=inplace, limit=limit, - regex=True) - else: - - # dest iterable dict-like - if is_dict_like(value): # NA -> {'A' : 0, 'B' : -1} - new_data = self._data - - for k, v in compat.iteritems(value): - if k in self: - new_data = new_data.replace(to_replace=to_replace, - value=v, filter=[k], - inplace=inplace, - regex=regex) - - elif not is_list_like(value): # NA -> 0 - new_data = self._data.replace(to_replace=to_replace, - value=value, inplace=inplace, - regex=regex) - else: - msg = ('Invalid "to_replace" type: ' - '{0!r}').format(type(to_replace).__name__) - raise TypeError(msg) # pragma: no cover - - if inplace: - self._update_inplace(new_data) - else: - return self._constructor(new_data).__finalize__(self) - _shared_docs['interpolate'] = """ Please note that only ``method='linear'`` is supported for DataFrames/Series with a MultiIndex. diff --git a/pandas/core/series.py b/pandas/core/series.py index 78b4c3a70a519..5bf620e103f28 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -10,6 +10,7 @@ import warnings from textwrap import dedent +import pandas as pd import numpy as np import numpy.ma as ma @@ -26,6 +27,7 @@ is_hashable, is_iterator, is_dict_like, + is_re_compilable, is_scalar, _is_unorderable_exception, _ensure_platform_int, @@ -38,6 +40,7 @@ maybe_cast_to_datetime, maybe_castable, construct_1d_arraylike_from_scalar) from pandas.core.dtypes.missing import isna, notna, remove_na_arraylike +import pandas.core.missing as missing from pandas.core.index import (Index, MultiIndex, InvalidIndexError, Float64Index, _ensure_index) @@ -52,7 +55,7 @@ from pandas import compat from pandas.io.formats.terminal import get_terminal_size from pandas.compat import ( - zip, u, OrderedDict, StringIO, range, get_range_parameters) + zip, lzip, u, OrderedDict, StringIO, range, get_range_parameters) from pandas.compat.numpy import function as nv import pandas.core.ops as ops @@ -106,6 +109,30 @@ def wrapper(self): return wrapper +def _single_replace(self, to_replace, method, inplace, limit): + if self.ndim != 1: + raise TypeError('cannot replace {0} with method {1} on a {2}' + .format(to_replace, method, type(self).__name__)) + + orig_dtype = self.dtype + result = self if inplace else self.copy() + fill_f = missing.get_fill_func(method) + + mask = missing.mask_missing(result.values, to_replace) + values = fill_f(result.values, limit=limit, mask=mask) + + if values.dtype == orig_dtype and inplace: + return + + result = pd.Series(values, index=self.index, + dtype=self.dtype).__finalize__(self) + + if inplace: + self._update_inplace(result._data) + return + + return result + # ---------------------------------------------------------------------- # Series class @@ -2671,6 +2698,334 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, limit=limit, downcast=downcast, **kwargs) + def replace(self, to_replace=None, value=None, inplace=False, limit=None, + regex=False, method='pad', axis=None): + """ + Replace values given in 'to_replace' with 'value'. + + Parameters + ---------- + to_replace : str, regex, list, dict, Series, numeric, or None + + * numeric, str or regex: + - numeric: numeric values equal to ``to_replace`` will be + replaced with ``value`` + - str: string exactly matching ``to_replace`` will be replaced + with ``value`` + - regex: regexs matching ``to_replace`` will be replaced with + ``value`` + + * list of str, regex, or numeric: + + - First, if ``to_replace`` and ``value`` are both lists, they + **must** be the same length. + - Second, if ``regex=True`` then all of the strings in **both** + lists will be interpreted as regexs otherwise they will match + directly. This doesn't matter much for ``value`` since there + are only a few possible substitution regexes you can use. + - str and regex rules apply as above. + + * dict: + + - Dicts can be used to specify different replacement values + for different existing values. For example, + {'a': 'b', 'y': 'z'} replaces the value 'a' with 'b' and + 'y' with 'z'. To use a dict in this way the ``value`` + parameter should be ``None``. + + * None: + + - This means that the ``regex`` argument must be a string, + compiled regular expression, or list, dict, ndarray or Series + of such elements. If ``value`` is also ``None`` then this + **must** be a dictionary or ``Series``. + + See the examples section for examples of each of these. + value : scalar, dict, list, str, regex, default None + Value to replace any values matching ``to_replace`` with. Regular + expressions, strings and lists or dicts of such objects are also + allowed. + inplace : boolean, default False + If True, in place. Note: this will modify any + other views on this object (e.g. a column from a DataFrame). + Returns the caller if this is True. + limit : int, default None + Maximum size gap to forward or backward fill + regex : bool or same types as ``to_replace``, default False + Whether to interpret ``to_replace`` and/or ``value`` as regular + expressions. If this is ``True`` then ``to_replace`` *must* be a + string. Alternatively, this could be a regular expression or a list, + dict, or array of regular expressions in which case ``to_replace`` + must be ``None``. + method : string, optional, {'pad', 'ffill', 'bfill'} + The method to use when for replacement, when ``to_replace`` is a + ``list``. + + See Also + -------- + :func:`Series.fillna` : Fill NA/NaN values + :func:`Series.where` : Replace values based on boolean condition + + Returns + ------- + filled : Series + + Raises + ------ + AssertionError + * If ``regex`` is not a ``bool`` and ``to_replace`` is not ``None``. + TypeError + * If ``to_replace`` is a ``dict`` and ``value`` is not a ``list``, + ``dict``, ``ndarray``, or ``Series`` + * If ``to_replace`` is ``None`` and ``regex`` is not compilable into a + regular expression or is a list, dict, ndarray, or Series. + * When replacing multiple ``bool`` or ``datetime64`` objects and the + the arguments to ``to_replace`` does not match the type of the + value being replaced + ValueError + * If a ``list`` or an ``ndarray`` is passed to ``to_replace`` and + ``value`` but they are not the same length. + + Notes + ----- + * Regex substitution is performed under the hood with ``re.sub``. The + rules for substitution for ``re.sub`` are the same. + * Regular expressions will only substitute on strings, meaning you + cannot provide, for example, a regular expression matching floating + point numbers and expect the columns in your frame that have a + numeric dtype to be matched. However, if those floating point numbers + *are* strings, then you can do this. + * This method has *a lot* of options. You are encouraged to experiment + and play with this method to gain intuition about how it works. + + Examples + -------- + + >>> s = pd.Series([0, 1, 2, 3, 4]) + >>> s.replace(0, 5) + 0 5 + 1 1 + 2 2 + 3 3 + 4 4 + dtype: int64 + + >>> s.replace([0, 1, 2], 300) + 0 300 + 1 300 + 2 300 + 3 3 + 4 4 + dtype: int64 + >>> s.replace([0, 1, 2], [100, 200, 300]) + 0 100 + 1 200 + 2 300 + 3 3 + 4 4 + dtype: int64 + >>> s.replace([1, 2], method='bfill') + 0 0 + 1 3 + 2 3 + 3 3 + 4 4 + dtype: int64 + + >>> s.replace({0: 10, 1: 100}) + 0 10 + 1 100 + 2 2 + 3 3 + 4 4 + dtype: int64 + + >>> s = pd.Series(['bat', 'foo', 'bar', 'bait']) + >>> s.replace(to_replace=r'^ba.$', value='new', regex=True) + 0 new + 1 foo + 2 new + 3 bait + dtype: object + >>> s.replace(regex=r'^ba.$', value='new') + 0 new + 1 foo + 2 new + 3 bait + dtype: object + >>> s.replace(regex={r'^ba.$':'new', 'foo':'xyz'}) + 0 new + 1 xyz + 2 new + 3 bait + dtype: object + >>> s.replace(regex=[r'^ba.$', 'foo'], value='new') + 0 new + 1 new + 2 new + 3 bait + dtype: object + + Note that when replacing multiple ``bool`` or ``datetime64`` objects, + the data types in the ``to_replace`` parameter must match the data + type of the value being replaced: + + >>> s = pd.Series([True, False, True]) + >>> s.replace({'a string': 'new value', True: False}) # raises + TypeError: Cannot compare types 'ndarray(dtype=bool)' and 'str' + + This raises a ``TypeError`` because one of the ``dict`` keys is not of + the correct type for replacement. + + """ + inplace = validate_bool_kwarg(inplace, 'inplace') + if not is_bool(regex) and to_replace is not None: + raise AssertionError("'to_replace' must be 'None' if 'regex' is " + "not a bool") + if axis is not None: + warnings.warn('the "axis" argument is deprecated ' + 'and will be removed in' + 'v0.13; this argument has no effect') + + self._consolidate_inplace() + + if value is None: + # passing a single value that is scalar like + # when value is None (GH5319), for compat + if not is_dict_like(to_replace) and not is_dict_like(regex): + to_replace = [to_replace] + + if isinstance(to_replace, (tuple, list)): + return _single_replace(self, to_replace, method, inplace, + limit) + + if not is_dict_like(to_replace): + if not is_dict_like(regex): + raise TypeError('If "to_replace" and "value" are both None' + ' and "to_replace" is not a list, then ' + 'regex must be a mapping') + to_replace = regex + regex = True + + items = list(compat.iteritems(to_replace)) + keys, values = lzip(*items) or ([], []) + + are_mappings = [is_dict_like(v) for v in values] + + if any(are_mappings): + if not all(are_mappings): + raise TypeError("If a nested mapping is passed, all values" + " of the top level mapping must be " + "mappings") + # passed a nested dict/Series + to_rep_dict = {} + value_dict = {} + + for k, v in items: + keys, values = lzip(*v.items()) or ([], []) + if set(keys) & set(values): + raise ValueError("Replacement not allowed with " + "overlapping keys and values") + to_rep_dict[k] = list(keys) + value_dict[k] = list(values) + + to_replace, value = to_rep_dict, value_dict + else: + to_replace, value = keys, values + + return self.replace(to_replace, value, inplace=inplace, + limit=limit, regex=regex) + else: + + # need a non-zero len on all axes + for a in self._AXIS_ORDERS: + if not len(self._get_axis(a)): + return self + + new_data = self._data + if is_dict_like(to_replace): + if is_dict_like(value): # {'A' : NA} -> {'A' : 0} + res = self if inplace else self.copy() + for c, src in compat.iteritems(to_replace): + if c in value and c in self: + # object conversion is handled in + # series.replace which is called recursivelly + res[c] = res[c].replace(to_replace=src, + value=value[c], + inplace=False, + regex=regex) + return None if inplace else res + + # {'A': NA} -> 0 + elif not is_list_like(value): + keys = [(k, src) for k, src in compat.iteritems(to_replace) + if k in self] + keys_len = len(keys) - 1 + for i, (k, src) in enumerate(keys): + convert = i == keys_len + new_data = new_data.replace(to_replace=src, + value=value, + filter=[k], + inplace=inplace, + regex=regex, + convert=convert) + else: + raise TypeError('value argument must be scalar, dict, or ' + 'Series') + + elif is_list_like(to_replace): # [NA, ''] -> [0, 'missing'] + if is_list_like(value): + if len(to_replace) != len(value): + raise ValueError('Replacement lists must match ' + 'in length. Expecting %d got %d ' % + (len(to_replace), len(value))) + + new_data = self._data.replace_list(src_list=to_replace, + dest_list=value, + inplace=inplace, + regex=regex) + + else: # [NA, ''] -> 0 + new_data = self._data.replace(to_replace=to_replace, + value=value, inplace=inplace, + regex=regex) + elif to_replace is None: + if not (is_re_compilable(regex) or + is_list_like(regex) or is_dict_like(regex)): + raise TypeError("'regex' must be a string or a compiled " + "regular expression or a list or dict of " + "strings or regular expressions, you " + "passed a" + " {0!r}".format(type(regex).__name__)) + return self.replace(regex, value, inplace=inplace, limit=limit, + regex=True) + else: + + # dest iterable dict-like + if is_dict_like(value): # NA -> {'A' : 0, 'B' : -1} + new_data = self._data + + for k, v in compat.iteritems(value): + if k in self: + new_data = new_data.replace(to_replace=to_replace, + value=v, filter=[k], + inplace=inplace, + regex=regex) + + elif not is_list_like(value): # NA -> 0 + new_data = self._data.replace(to_replace=to_replace, + value=value, inplace=inplace, + regex=regex) + else: + msg = ('Invalid "to_replace" type: ' + '{0!r}').format(type(to_replace).__name__) + raise TypeError(msg) # pragma: no cover + + if inplace: + self._update_inplace(new_data) + else: + return self._constructor(new_data).__finalize__(self) + @Appender(generic._shared_docs['shift'] % _shared_doc_kwargs) def shift(self, periods=1, freq=None, axis=0): return super(Series, self).shift(periods=periods, freq=freq, axis=axis) From a1db92cc8a62e607f60acf45bcf17ee2dcc771e0 Mon Sep 17 00:00:00 2001 From: Paul Reidy Date: Sat, 4 Nov 2017 20:48:10 +0000 Subject: [PATCH 2/5] Move replace back to generic.py --- pandas/core/frame.py | 178 ++--------------------------------------- pandas/core/generic.py | 157 ++++++++++++++++++++++++++++++++++++ pandas/core/series.py | 178 ++--------------------------------------- 3 files changed, 167 insertions(+), 346 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 746e63f847e41..79224a6220d2e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -238,30 +238,6 @@ """ -def _single_replace(self, to_replace, method, inplace, limit): - if self.ndim != 1: - raise TypeError('cannot replace {0} with method {1} on a {2}' - .format(to_replace, method, type(self).__name__)) - - orig_dtype = self.dtype - result = self if inplace else self.copy() - fill_f = missing.get_fill_func(method) - - mask = missing.mask_missing(result.values, to_replace) - values = fill_f(result.values, limit=limit, mask=mask) - - if values.dtype == orig_dtype and inplace: - return - - result = pd.Series(values, index=self.index, - dtype=self.dtype).__finalize__(self) - - if inplace: - self._update_inplace(result._data) - return - - return result - # ----------------------------------------------------------------------- # DataFrame class @@ -3189,8 +3165,8 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, See Also -------- - :func:`DataFrame.fillna` : Fill NA/NaN values - :func:`DataFrame.where` : Replace values based on boolean condition + DataFrame.fillna : Fill NA/NaN values + DataFrame.where : Replace values based on boolean condition Returns ------- @@ -3316,153 +3292,9 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, the correct type for replacement. """ - inplace = validate_bool_kwarg(inplace, 'inplace') - if not is_bool(regex) and to_replace is not None: - raise AssertionError("'to_replace' must be 'None' if 'regex' is " - "not a bool") - if axis is not None: - warnings.warn('the "axis" argument is deprecated ' - 'and will be removed in' - 'v0.13; this argument has no effect') - - self._consolidate_inplace() - - if value is None: - # passing a single value that is scalar like - # when value is None (GH5319), for compat - if not is_dict_like(to_replace) and not is_dict_like(regex): - to_replace = [to_replace] - - if isinstance(to_replace, (tuple, list)): - return _single_replace(self, to_replace, method, inplace, - limit) - - if not is_dict_like(to_replace): - if not is_dict_like(regex): - raise TypeError('If "to_replace" and "value" are both None' - ' and "to_replace" is not a list, then ' - 'regex must be a mapping') - to_replace = regex - regex = True - - items = list(compat.iteritems(to_replace)) - keys, values = lzip(*items) or ([], []) - - are_mappings = [is_dict_like(v) for v in values] - - if any(are_mappings): - if not all(are_mappings): - raise TypeError("If a nested mapping is passed, all values" - " of the top level mapping must be " - "mappings") - # passed a nested dict/Series - to_rep_dict = {} - value_dict = {} - - for k, v in items: - keys, values = lzip(*v.items()) or ([], []) - if set(keys) & set(values): - raise ValueError("Replacement not allowed with " - "overlapping keys and values") - to_rep_dict[k] = list(keys) - value_dict[k] = list(values) - - to_replace, value = to_rep_dict, value_dict - else: - to_replace, value = keys, values - - return self.replace(to_replace, value, inplace=inplace, - limit=limit, regex=regex) - else: - - # need a non-zero len on all axes - for a in self._AXIS_ORDERS: - if not len(self._get_axis(a)): - return self - - new_data = self._data - if is_dict_like(to_replace): - if is_dict_like(value): # {'A' : NA} -> {'A' : 0} - res = self if inplace else self.copy() - for c, src in compat.iteritems(to_replace): - if c in value and c in self: - # object conversion is handled in - # series.replace which is called recursivelly - res[c] = res[c].replace(to_replace=src, - value=value[c], - inplace=False, - regex=regex) - return None if inplace else res - - # {'A': NA} -> 0 - elif not is_list_like(value): - keys = [(k, src) for k, src in compat.iteritems(to_replace) - if k in self] - keys_len = len(keys) - 1 - for i, (k, src) in enumerate(keys): - convert = i == keys_len - new_data = new_data.replace(to_replace=src, - value=value, - filter=[k], - inplace=inplace, - regex=regex, - convert=convert) - else: - raise TypeError('value argument must be scalar, dict, or ' - 'Series') - - elif is_list_like(to_replace): # [NA, ''] -> [0, 'missing'] - if is_list_like(value): - if len(to_replace) != len(value): - raise ValueError('Replacement lists must match ' - 'in length. Expecting %d got %d ' % - (len(to_replace), len(value))) - - new_data = self._data.replace_list(src_list=to_replace, - dest_list=value, - inplace=inplace, - regex=regex) - - else: # [NA, ''] -> 0 - new_data = self._data.replace(to_replace=to_replace, - value=value, inplace=inplace, - regex=regex) - elif to_replace is None: - if not (is_re_compilable(regex) or - is_list_like(regex) or is_dict_like(regex)): - raise TypeError("'regex' must be a string or a compiled " - "regular expression or a list or dict of " - "strings or regular expressions, you " - "passed a" - " {0!r}".format(type(regex).__name__)) - return self.replace(regex, value, inplace=inplace, limit=limit, - regex=True) - else: - - # dest iterable dict-like - if is_dict_like(value): # NA -> {'A' : 0, 'B' : -1} - new_data = self._data - - for k, v in compat.iteritems(value): - if k in self: - new_data = new_data.replace(to_replace=to_replace, - value=v, filter=[k], - inplace=inplace, - regex=regex) - - elif not is_list_like(value): # NA -> 0 - new_data = self._data.replace(to_replace=to_replace, - value=value, inplace=inplace, - regex=regex) - else: - msg = ('Invalid "to_replace" type: ' - '{0!r}').format(type(to_replace).__name__) - raise TypeError(msg) # pragma: no cover - - if inplace: - self._update_inplace(new_data) - else: - return self._constructor(new_data).__finalize__(self) + return super(DataFrame, self).replace(to_replace=to_replace, + value=value, inplace=inplace, limit=limit, regex=regex, + method=method, axis=axis) @Appender(_shared_docs['shift'] % _shared_doc_kwargs) def shift(self, periods=1, freq=None, axis=0): diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 02187773d7593..82c257c927814 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -69,6 +69,10 @@ def _single_replace(self, to_replace, method, inplace, limit): + """ + Replaces values in a Series using the fill method specified when no + replacement value is given in the replace method + """ if self.ndim != 1: raise TypeError('cannot replace {0} with method {1} on a {2}' .format(to_replace, method, type(self).__name__)) @@ -4787,6 +4791,159 @@ def bfill(self, axis=None, inplace=False, limit=None, downcast=None): return self.fillna(method='bfill', axis=axis, inplace=inplace, limit=limit, downcast=downcast) + def replace(self, to_replace=None, value=None, inplace=False, limit=None, + regex=False, method='pad', axis=None): + """ + Replace values given in 'to_replace' with 'value' + """ + inplace = validate_bool_kwarg(inplace, 'inplace') + if not is_bool(regex) and to_replace is not None: + raise AssertionError("'to_replace' must be 'None' if 'regex' is " + "not a bool") + if axis is not None: + warnings.warn('the "axis" argument is deprecated ' + 'and will be removed in' + 'v0.13; this argument has no effect') + + self._consolidate_inplace() + + if value is None: + # passing a single value that is scalar like + # when value is None (GH5319), for compat + if not is_dict_like(to_replace) and not is_dict_like(regex): + to_replace = [to_replace] + + if isinstance(to_replace, (tuple, list)): + return _single_replace(self, to_replace, method, inplace, + limit) + + if not is_dict_like(to_replace): + if not is_dict_like(regex): + raise TypeError('If "to_replace" and "value" are both None' + ' and "to_replace" is not a list, then ' + 'regex must be a mapping') + to_replace = regex + regex = True + + items = list(compat.iteritems(to_replace)) + keys, values = lzip(*items) or ([], []) + + are_mappings = [is_dict_like(v) for v in values] + + if any(are_mappings): + if not all(are_mappings): + raise TypeError("If a nested mapping is passed, all values" + " of the top level mapping must be " + "mappings") + # passed a nested dict/Series + to_rep_dict = {} + value_dict = {} + + for k, v in items: + keys, values = lzip(*v.items()) or ([], []) + if set(keys) & set(values): + raise ValueError("Replacement not allowed with " + "overlapping keys and values") + to_rep_dict[k] = list(keys) + value_dict[k] = list(values) + + to_replace, value = to_rep_dict, value_dict + else: + to_replace, value = keys, values + + return self.replace(to_replace, value, inplace=inplace, + limit=limit, regex=regex) + else: + + # need a non-zero len on all axes + for a in self._AXIS_ORDERS: + if not len(self._get_axis(a)): + return self + + new_data = self._data + if is_dict_like(to_replace): + if is_dict_like(value): # {'A' : NA} -> {'A' : 0} + res = self if inplace else self.copy() + for c, src in compat.iteritems(to_replace): + if c in value and c in self: + # object conversion is handled in + # series.replace which is called recursivelly + res[c] = res[c].replace(to_replace=src, + value=value[c], + inplace=False, + regex=regex) + return None if inplace else res + + # {'A': NA} -> 0 + elif not is_list_like(value): + keys = [(k, src) for k, src in compat.iteritems(to_replace) + if k in self] + keys_len = len(keys) - 1 + for i, (k, src) in enumerate(keys): + convert = i == keys_len + new_data = new_data.replace(to_replace=src, + value=value, + filter=[k], + inplace=inplace, + regex=regex, + convert=convert) + else: + raise TypeError('value argument must be scalar, dict, or ' + 'Series') + + elif is_list_like(to_replace): # [NA, ''] -> [0, 'missing'] + if is_list_like(value): + if len(to_replace) != len(value): + raise ValueError('Replacement lists must match ' + 'in length. Expecting %d got %d ' % + (len(to_replace), len(value))) + + new_data = self._data.replace_list(src_list=to_replace, + dest_list=value, + inplace=inplace, + regex=regex) + + else: # [NA, ''] -> 0 + new_data = self._data.replace(to_replace=to_replace, + value=value, inplace=inplace, + regex=regex) + elif to_replace is None: + if not (is_re_compilable(regex) or + is_list_like(regex) or is_dict_like(regex)): + raise TypeError("'regex' must be a string or a compiled " + "regular expression or a list or dict of " + "strings or regular expressions, you " + "passed a" + " {0!r}".format(type(regex).__name__)) + return self.replace(regex, value, inplace=inplace, limit=limit, + regex=True) + else: + + # dest iterable dict-like + if is_dict_like(value): # NA -> {'A' : 0, 'B' : -1} + new_data = self._data + + for k, v in compat.iteritems(value): + if k in self: + new_data = new_data.replace(to_replace=to_replace, + value=v, filter=[k], + inplace=inplace, + regex=regex) + + elif not is_list_like(value): # NA -> 0 + new_data = self._data.replace(to_replace=to_replace, + value=value, inplace=inplace, + regex=regex) + else: + msg = ('Invalid "to_replace" type: ' + '{0!r}').format(type(to_replace).__name__) + raise TypeError(msg) # pragma: no cover + + if inplace: + self._update_inplace(new_data) + else: + return self._constructor(new_data).__finalize__(self) + _shared_docs['interpolate'] = """ Please note that only ``method='linear'`` is supported for DataFrames/Series with a MultiIndex. diff --git a/pandas/core/series.py b/pandas/core/series.py index 5bf620e103f28..d895898289c8c 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -109,30 +109,6 @@ def wrapper(self): return wrapper -def _single_replace(self, to_replace, method, inplace, limit): - if self.ndim != 1: - raise TypeError('cannot replace {0} with method {1} on a {2}' - .format(to_replace, method, type(self).__name__)) - - orig_dtype = self.dtype - result = self if inplace else self.copy() - fill_f = missing.get_fill_func(method) - - mask = missing.mask_missing(result.values, to_replace) - values = fill_f(result.values, limit=limit, mask=mask) - - if values.dtype == orig_dtype and inplace: - return - - result = pd.Series(values, index=self.index, - dtype=self.dtype).__finalize__(self) - - if inplace: - self._update_inplace(result._data) - return - - return result - # ---------------------------------------------------------------------- # Series class @@ -2763,8 +2739,8 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, See Also -------- - :func:`Series.fillna` : Fill NA/NaN values - :func:`Series.where` : Replace values based on boolean condition + Series.fillna : Fill NA/NaN values + Series.where : Replace values based on boolean condition Returns ------- @@ -2878,153 +2854,9 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, the correct type for replacement. """ - inplace = validate_bool_kwarg(inplace, 'inplace') - if not is_bool(regex) and to_replace is not None: - raise AssertionError("'to_replace' must be 'None' if 'regex' is " - "not a bool") - if axis is not None: - warnings.warn('the "axis" argument is deprecated ' - 'and will be removed in' - 'v0.13; this argument has no effect') - - self._consolidate_inplace() - - if value is None: - # passing a single value that is scalar like - # when value is None (GH5319), for compat - if not is_dict_like(to_replace) and not is_dict_like(regex): - to_replace = [to_replace] - - if isinstance(to_replace, (tuple, list)): - return _single_replace(self, to_replace, method, inplace, - limit) - - if not is_dict_like(to_replace): - if not is_dict_like(regex): - raise TypeError('If "to_replace" and "value" are both None' - ' and "to_replace" is not a list, then ' - 'regex must be a mapping') - to_replace = regex - regex = True - - items = list(compat.iteritems(to_replace)) - keys, values = lzip(*items) or ([], []) - - are_mappings = [is_dict_like(v) for v in values] - - if any(are_mappings): - if not all(are_mappings): - raise TypeError("If a nested mapping is passed, all values" - " of the top level mapping must be " - "mappings") - # passed a nested dict/Series - to_rep_dict = {} - value_dict = {} - - for k, v in items: - keys, values = lzip(*v.items()) or ([], []) - if set(keys) & set(values): - raise ValueError("Replacement not allowed with " - "overlapping keys and values") - to_rep_dict[k] = list(keys) - value_dict[k] = list(values) - - to_replace, value = to_rep_dict, value_dict - else: - to_replace, value = keys, values - - return self.replace(to_replace, value, inplace=inplace, - limit=limit, regex=regex) - else: - - # need a non-zero len on all axes - for a in self._AXIS_ORDERS: - if not len(self._get_axis(a)): - return self - - new_data = self._data - if is_dict_like(to_replace): - if is_dict_like(value): # {'A' : NA} -> {'A' : 0} - res = self if inplace else self.copy() - for c, src in compat.iteritems(to_replace): - if c in value and c in self: - # object conversion is handled in - # series.replace which is called recursivelly - res[c] = res[c].replace(to_replace=src, - value=value[c], - inplace=False, - regex=regex) - return None if inplace else res - - # {'A': NA} -> 0 - elif not is_list_like(value): - keys = [(k, src) for k, src in compat.iteritems(to_replace) - if k in self] - keys_len = len(keys) - 1 - for i, (k, src) in enumerate(keys): - convert = i == keys_len - new_data = new_data.replace(to_replace=src, - value=value, - filter=[k], - inplace=inplace, - regex=regex, - convert=convert) - else: - raise TypeError('value argument must be scalar, dict, or ' - 'Series') - - elif is_list_like(to_replace): # [NA, ''] -> [0, 'missing'] - if is_list_like(value): - if len(to_replace) != len(value): - raise ValueError('Replacement lists must match ' - 'in length. Expecting %d got %d ' % - (len(to_replace), len(value))) - - new_data = self._data.replace_list(src_list=to_replace, - dest_list=value, - inplace=inplace, - regex=regex) - - else: # [NA, ''] -> 0 - new_data = self._data.replace(to_replace=to_replace, - value=value, inplace=inplace, - regex=regex) - elif to_replace is None: - if not (is_re_compilable(regex) or - is_list_like(regex) or is_dict_like(regex)): - raise TypeError("'regex' must be a string or a compiled " - "regular expression or a list or dict of " - "strings or regular expressions, you " - "passed a" - " {0!r}".format(type(regex).__name__)) - return self.replace(regex, value, inplace=inplace, limit=limit, - regex=True) - else: - - # dest iterable dict-like - if is_dict_like(value): # NA -> {'A' : 0, 'B' : -1} - new_data = self._data - - for k, v in compat.iteritems(value): - if k in self: - new_data = new_data.replace(to_replace=to_replace, - value=v, filter=[k], - inplace=inplace, - regex=regex) - - elif not is_list_like(value): # NA -> 0 - new_data = self._data.replace(to_replace=to_replace, - value=value, inplace=inplace, - regex=regex) - else: - msg = ('Invalid "to_replace" type: ' - '{0!r}').format(type(to_replace).__name__) - raise TypeError(msg) # pragma: no cover - - if inplace: - self._update_inplace(new_data) - else: - return self._constructor(new_data).__finalize__(self) + return super(Series, self).replace(to_replace=to_replace, value=value, + inplace=inplace, limit=limit, + regex=regex, method=method, axis=axis) @Appender(generic._shared_docs['shift'] % _shared_doc_kwargs) def shift(self, periods=1, freq=None, axis=0): From d72cd704f1b7a89810116334a1b51c2101264c52 Mon Sep 17 00:00:00 2001 From: Paul Reidy Date: Thu, 16 Nov 2017 22:32:47 +0000 Subject: [PATCH 3/5] fix some lint issues --- pandas/core/frame.py | 25 ++++++++++++++----------- pandas/core/series.py | 19 +++++++++++-------- 2 files changed, 25 insertions(+), 19 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 79224a6220d2e..c5585884a76b6 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3156,9 +3156,9 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, regex : bool or same types as `to_replace`, default False Whether to interpret ``to_replace`` and/or ``value`` as regular expressions. If this is ``True`` then ``to_replace`` *must* be a - string. Alternatively, this could be a regular expression or a list, - dict, or array of regular expressions in which case ``to_replace`` - must be ``None``. + string. Alternatively, this could be a regular expression or a + list, dict, or array of regular expressions in which case + ``to_replace`` must be ``None``. method : string, optional, {'pad', 'ffill', 'bfill'} The method to use when for replacement, when ``to_replace`` is a ``list``. @@ -3175,13 +3175,15 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, Raises ------ AssertionError - * If ``regex`` is not a ``bool`` and ``to_replace`` is not ``None``. + * If ``regex`` is not a ``bool`` and ``to_replace`` is not + ``None``. TypeError * If ``to_replace`` is a ``dict`` and `value` is not a ``list``, ``dict``, ``ndarray``, or ``Series`` - * If ``to_replace`` is ``None`` and ``regex`` is not compilable into a - regular expression or is a list, dict, ndarray, or Series. - * When replacing multiple ``bool`` or ``datetime64`` objects and the + * If ``to_replace`` is ``None`` and ``regex`` is not compilable + into a regular expression or is a list, dict, ndarray, or + Series. + * When replacing multiple ``bool`` or ``datetime64`` objects and the arguments to `to_replace` does not match the type of the value being replaced ValueError @@ -3195,8 +3197,8 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, * Regular expressions will only substitute on strings, meaning you cannot provide, for example, a regular expression matching floating point numbers and expect the columns in your frame that have a - numeric dtype to be matched. However, if those floating point numbers - *are* strings, then you can do this. + numeric dtype to be matched. However, if those floating point + numbers *are* strings, then you can do this. * This method has *a lot* of options. You are encouraged to experiment and play with this method to gain intuition about how it works. @@ -3293,8 +3295,9 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, """ return super(DataFrame, self).replace(to_replace=to_replace, - value=value, inplace=inplace, limit=limit, regex=regex, - method=method, axis=axis) + value=value, inplace=inplace, + limit=limit, regex=regex, + method=method, axis=axis) @Appender(_shared_docs['shift'] % _shared_doc_kwargs) def shift(self, periods=1, freq=None, axis=0): diff --git a/pandas/core/series.py b/pandas/core/series.py index d895898289c8c..594fd4863ee0d 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2730,9 +2730,9 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, regex : bool or same types as ``to_replace``, default False Whether to interpret ``to_replace`` and/or ``value`` as regular expressions. If this is ``True`` then ``to_replace`` *must* be a - string. Alternatively, this could be a regular expression or a list, - dict, or array of regular expressions in which case ``to_replace`` - must be ``None``. + string. Alternatively, this could be a regular expression or a + list, dict, or array of regular expressions in which case + ``to_replace`` must be ``None``. method : string, optional, {'pad', 'ffill', 'bfill'} The method to use when for replacement, when ``to_replace`` is a ``list``. @@ -2749,13 +2749,15 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, Raises ------ AssertionError - * If ``regex`` is not a ``bool`` and ``to_replace`` is not ``None``. + * If ``regex`` is not a ``bool`` and ``to_replace`` is not + ``None``. TypeError * If ``to_replace`` is a ``dict`` and ``value`` is not a ``list``, ``dict``, ``ndarray``, or ``Series`` - * If ``to_replace`` is ``None`` and ``regex`` is not compilable into a + * If ``to_replace`` is ``None`` and ``regex`` is not compilable + into a regular expression or is a list, dict, ndarray, or Series. - * When replacing multiple ``bool`` or ``datetime64`` objects and the + * When replacing multiple ``bool`` or ``datetime64`` objects and the arguments to ``to_replace`` does not match the type of the value being replaced ValueError @@ -2855,8 +2857,9 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, """ return super(Series, self).replace(to_replace=to_replace, value=value, - inplace=inplace, limit=limit, - regex=regex, method=method, axis=axis) + inplace=inplace, limit=limit, + regex=regex, method=method, + axis=axis) @Appender(generic._shared_docs['shift'] % _shared_doc_kwargs) def shift(self, periods=1, freq=None, axis=0): From 66e1dafb7ba156f058a8af46dbf883baadbad700 Mon Sep 17 00:00:00 2001 From: Paul Reidy Date: Mon, 29 Jan 2018 22:36:33 +0000 Subject: [PATCH 4/5] use _shared_docs --- pandas/core/frame.py | 213 +------------------------------------- pandas/core/generic.py | 229 ++++++++++++++++++++++++++++++++++++++++- pandas/core/series.py | 186 +-------------------------------- 3 files changed, 229 insertions(+), 399 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index c5585884a76b6..201d8ba427c8a 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -20,7 +20,6 @@ import warnings from textwrap import dedent -import pandas as pd import numpy as np import numpy.ma as ma @@ -43,7 +42,6 @@ is_datetimetz, is_datetime64_any_dtype, is_datetime64tz_dtype, - is_bool, is_bool_dtype, is_integer_dtype, is_float_dtype, @@ -3082,218 +3080,9 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, inplace=inplace, limit=limit, downcast=downcast, **kwargs) + @Appender(_shared_docs['replace'] % _shared_doc_kwargs) def replace(self, to_replace=None, value=None, inplace=False, limit=None, regex=False, method='pad', axis=None): - """ - Replace values given in 'to_replace' with 'value'. - - Parameters - ---------- - to_replace : str, regex, list, dict, Series, numeric, or None - - * numeric, str or regex: - - - numeric: numeric values equal to ``to_replace`` will be - replaced with ``value`` - - str: string exactly matching `to_replace` will be replaced - with ``value`` - - regex: regexs matching ``to_replace`` will be replaced with - ``value`` - - * list of str, regex, or numeric: - - - First, if ``to_replace`` and ``value`` are both lists, they - **must** be the same length. - - Second, if ``regex=True`` then all of the strings in **both** - lists will be interpreted as regexs otherwise they will match - directly. This doesn't matter much for ``value`` since there - are only a few possible substitution regexes you can use. - - str and regex rules apply as above. - - * dict: - - - Dicts can be used to specify different replacement values - for different existing values. For example, - {'a': 'b', 'y': 'z'} replaces the value 'a' with 'b' and - 'y' with 'z'. To use a dict in this way the ``value`` - parameter should be ``None``. - - Alternatively, a dict can specify that different values - should be replaced in different columns. For example, - {'a': 1, 'b': 'z'} looks for the value 1 in column 'a' and - the value 'z' in column 'b' and replaces these values with - whatever is specified in ``value``. The ``value`` parameter - should not be ``None`` in this case. You can treat this as a - special case of passing two lists except that you are - specifying the column to search in. - - Nested dictionaries, e.g., {'a': {'b': np.nan}}, are read as - follows: look in column 'a' for the value 'b' and replace it - with NaN. The ``value`` parameter should be ``None`` to use - a nested dict in this way. You can nest regular expressions - as well. Note that column names (the top-level dictionary - keys in a nested dictionary) **cannot** be regular - expressions. - - * None: - - - This means that the ``regex`` argument must be a string, - compiled regular expression, or list, dict, ndarray or Series - of such elements. If ``value`` is also ``None`` then this - **must** be a nested dictionary or ``Series``. - - See the examples section for examples of each of these. - value : scalar, dict, list, str, regex, default None - Value to replace any values matching ``to_replace`` with. - Alternatively, a dict of values specifying which value to use for - each column (columns not in the dict will not be filled). Regular - expressions, strings and lists or dicts of such objects are also - allowed. - inplace : boolean, default False - If True, in place. Note: this will modify any - other views on this object (e.g. a column from a DataFrame). - Returns the caller if this is True. - limit : int, default None - Maximum size gap to forward or backward fill - regex : bool or same types as `to_replace`, default False - Whether to interpret ``to_replace`` and/or ``value`` as regular - expressions. If this is ``True`` then ``to_replace`` *must* be a - string. Alternatively, this could be a regular expression or a - list, dict, or array of regular expressions in which case - ``to_replace`` must be ``None``. - method : string, optional, {'pad', 'ffill', 'bfill'} - The method to use when for replacement, when ``to_replace`` is a - ``list``. - - See Also - -------- - DataFrame.fillna : Fill NA/NaN values - DataFrame.where : Replace values based on boolean condition - - Returns - ------- - filled : DataFrame - - Raises - ------ - AssertionError - * If ``regex`` is not a ``bool`` and ``to_replace`` is not - ``None``. - TypeError - * If ``to_replace`` is a ``dict`` and `value` is not a ``list``, - ``dict``, ``ndarray``, or ``Series`` - * If ``to_replace`` is ``None`` and ``regex`` is not compilable - into a regular expression or is a list, dict, ndarray, or - Series. - * When replacing multiple ``bool`` or ``datetime64`` objects and - the arguments to `to_replace` does not match the type of the - value being replaced - ValueError - * If a ``list`` or an ``ndarray`` is passed to `to_replace` and - `value` but they are not the same length. - - Notes - ----- - * Regex substitution is performed under the hood with ``re.sub``. The - rules for substitution for ``re.sub`` are the same. - * Regular expressions will only substitute on strings, meaning you - cannot provide, for example, a regular expression matching floating - point numbers and expect the columns in your frame that have a - numeric dtype to be matched. However, if those floating point - numbers *are* strings, then you can do this. - * This method has *a lot* of options. You are encouraged to experiment - and play with this method to gain intuition about how it works. - - Examples - -------- - - >>> df = pd.DataFrame({'A': [0, 1, 2, 3, 4], - ... 'B': [5, 6, 7, 8, 9], - ... 'C': ['a', 'b', 'c', 'd', 'e']}) - >>> df.replace(0, 5) - A B C - 0 5 5 a - 1 1 6 b - 2 2 7 c - 3 3 8 d - 4 4 9 e - - >>> df.replace([0, 1, 2, 3], 4) - A B C - 0 4 5 a - 1 4 6 b - 2 4 7 c - 3 4 8 d - 4 4 9 e - >>> df.replace([0, 1, 2, 3], [4, 3, 2, 1]) - A B C - 0 4 5 a - 1 3 6 b - 2 2 7 c - 3 1 8 d - 4 4 9 e - - >>> df.replace({0: 10, 1: 100}) - A B C - 0 10 5 a - 1 100 6 b - 2 2 7 c - 3 3 8 d - 4 4 9 e - >>> df.replace({'A': 0, 'B': 5}, 100) - A B C - 0 100 100 a - 1 1 6 b - 2 2 7 c - 3 3 8 d - 4 4 9 e - >>> df.replace({'A': {0: 100, 4: 400}}) - A B C - 0 100 5 a - 1 1 6 b - 2 2 7 c - 3 3 8 d - 4 400 9 e - - >>> df = pd.DataFrame({'A': ['bat', 'foo', 'bait'], - ... 'B': ['abc', 'bar', 'xyz']}) - >>> df.replace(to_replace=r'^ba.$', value='new', regex=True) - A B - 0 new abc - 1 foo new - 2 bait xyz - >>> df.replace({'A': r'^ba.$'}, {'A': 'new'}, regex=True) - A B - 0 new abc - 1 foo bar - 2 bait xyz - >>> df.replace(regex=r'^ba.$', value='new') - A B - 0 new abc - 1 foo new - 2 bait xyz - >>> df.replace(regex={r'^ba.$':'new', 'foo':'xyz'}) - A B - 0 new abc - 1 xyz new - 2 bait xyz - >>> df.replace(regex=[r'^ba.$', 'foo'], value='new') - A B - 0 new abc - 1 new new - 2 bait xyz - - Note that when replacing multiple ``bool`` or ``datetime64`` objects, - the data types in the ``to_replace`` parameter must match the data - type of the value being replaced: - - >>> df = pd.DataFrame({'A': [True, False, True], - ... 'B': [False, True, False]}) - >>> df.replace({'a string': 'new value', True: False}) # raises - TypeError: Cannot compare types 'ndarray(dtype=bool)' and 'str' - - This raises a ``TypeError`` because one of the ``dict`` keys is not of - the correct type for replacement. - - """ return super(DataFrame, self).replace(to_replace=to_replace, value=value, inplace=inplace, limit=limit, regex=regex, diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 82c257c927814..c1a5b393d312b 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4791,11 +4791,234 @@ def bfill(self, axis=None, inplace=False, limit=None, downcast=None): return self.fillna(method='bfill', axis=axis, inplace=inplace, limit=limit, downcast=downcast) + _shared_docs['replace'] = (""" + Replace values given in 'to_replace' with 'value'. + + Parameters + ---------- + to_replace : str, regex, list, dict, Series, numeric, or None + + * numeric, str or regex: + + - numeric: numeric values equal to ``to_replace`` will be + replaced with ``value`` + - str: string exactly matching ``to_replace`` will be replaced + with ``value`` + - regex: regexs matching ``to_replace`` will be replaced with + ``value`` + + * list of str, regex, or numeric: + + - First, if ``to_replace`` and ``value`` are both lists, they + **must** be the same length. + - Second, if ``regex=True`` then all of the strings in **both** + lists will be interpreted as regexs otherwise they will match + directly. This doesn't matter much for ``value`` since there + are only a few possible substitution regexes you can use. + - str and regex rules apply as above. + + * dict: + + - Dicts can be used to specify different replacement values + for different existing values. For example, + {'a': 'b', 'y': 'z'} replaces the value 'a' with 'b' and + 'y' with 'z'. To use a dict in this way the ``value`` + parameter should be ``None``. + - For a DataFrame a dict can specify that different values + should be replaced in different columns. For example, + {'a': 1, 'b': 'z'} looks for the value 1 in column 'a' and + the value 'z' in column 'b' and replaces these values with + whatever is specified in ``value``. The ``value`` parameter + should not be ``None`` in this case. You can treat this as a + special case of passing two lists except that you are + specifying the column to search in. + - For a DataFrame nested dictionaries, e.g., + {'a': {'b': np.nan}}, are read as follows: look in column 'a' + for the value 'b' and replace it with NaN. The ``value`` + parameter should be ``None`` to use a nested dict in this + way. You can nest regular expressions as well. Note that + column names (the top-level dictionary keys in a nested + dictionary) **cannot** be regular expressions. + + * None: + + - This means that the ``regex`` argument must be a string, + compiled regular expression, or list, dict, ndarray or Series + of such elements. If ``value`` is also ``None`` then this + **must** be a nested dictionary or ``Series``. + + See the examples section for examples of each of these. + value : scalar, dict, list, str, regex, default None + Value to replace any values matching ``to_replace`` with. + For a DataFrame a dict of values can be used to specify which + value to use for each column (columns not in the dict will not be + filled). Regular expressions, strings and lists or dicts of such + objects are also allowed. + inplace : boolean, default False + If True, in place. Note: this will modify any + other views on this object (e.g. a column from a DataFrame). + Returns the caller if this is True. + limit : int, default None + Maximum size gap to forward or backward fill + regex : bool or same types as ``to_replace``, default False + Whether to interpret ``to_replace`` and/or ``value`` as regular + expressions. If this is ``True`` then ``to_replace`` *must* be a + string. Alternatively, this could be a regular expression or a + list, dict, or array of regular expressions in which case + ``to_replace`` must be ``None``. + method : string, optional, {'pad', 'ffill', 'bfill'} + The method to use when for replacement, when ``to_replace`` is a + ``list``. + + See Also + -------- + %(klass)s.fillna : Fill NA/NaN values + %(klass)s.where : Replace values based on boolean condition + + Returns + ------- + filled : %(klass)s + + Raises + ------ + AssertionError + * If ``regex`` is not a ``bool`` and ``to_replace`` is not + ``None``. + TypeError + * If ``to_replace`` is a ``dict`` and ``value`` is not a ``list``, + ``dict``, ``ndarray``, or ``Series`` + * If ``to_replace`` is ``None`` and ``regex`` is not compilable + into a regular expression or is a list, dict, ndarray, or + Series. + * When replacing multiple ``bool`` or ``datetime64`` objects and + the arguments to ``to_replace`` does not match the type of the + value being replaced + ValueError + * If a ``list`` or an ``ndarray`` is passed to ``to_replace`` and + `value` but they are not the same length. + + Notes + ----- + * Regex substitution is performed under the hood with ``re.sub``. The + rules for substitution for ``re.sub`` are the same. + * Regular expressions will only substitute on strings, meaning you + cannot provide, for example, a regular expression matching floating + point numbers and expect the columns in your frame that have a + numeric dtype to be matched. However, if those floating point + numbers *are* strings, then you can do this. + * This method has *a lot* of options. You are encouraged to experiment + and play with this method to gain intuition about how it works. + + Examples + -------- + + >>> s = pd.Series([0, 1, 2, 3, 4]) + >>> s.replace(0, 5) + 0 5 + 1 1 + 2 2 + 3 3 + 4 4 + dtype: int64 + >>> df = pd.DataFrame({'A': [0, 1, 2, 3, 4], + ... 'B': [5, 6, 7, 8, 9], + ... 'C': ['a', 'b', 'c', 'd', 'e']}) + >>> df.replace(0, 5) + A B C + 0 5 5 a + 1 1 6 b + 2 2 7 c + 3 3 8 d + 4 4 9 e + + >>> df.replace([0, 1, 2, 3], 4) + A B C + 0 4 5 a + 1 4 6 b + 2 4 7 c + 3 4 8 d + 4 4 9 e + >>> df.replace([0, 1, 2, 3], [4, 3, 2, 1]) + A B C + 0 4 5 a + 1 3 6 b + 2 2 7 c + 3 1 8 d + 4 4 9 e + >>> s.replace([1, 2], method='bfill') + 0 0 + 1 3 + 2 3 + 3 3 + 4 4 + dtype: int64 + + >>> df.replace({0: 10, 1: 100}) + A B C + 0 10 5 a + 1 100 6 b + 2 2 7 c + 3 3 8 d + 4 4 9 e + >>> df.replace({'A': 0, 'B': 5}, 100) + A B C + 0 100 100 a + 1 1 6 b + 2 2 7 c + 3 3 8 d + 4 4 9 e + >>> df.replace({'A': {0: 100, 4: 400}}) + A B C + 0 100 5 a + 1 1 6 b + 2 2 7 c + 3 3 8 d + 4 400 9 e + + >>> df = pd.DataFrame({'A': ['bat', 'foo', 'bait'], + ... 'B': ['abc', 'bar', 'xyz']}) + >>> df.replace(to_replace=r'^ba.$', value='new', regex=True) + A B + 0 new abc + 1 foo new + 2 bait xyz + >>> df.replace({'A': r'^ba.$'}, {'A': 'new'}, regex=True) + A B + 0 new abc + 1 foo bar + 2 bait xyz + >>> df.replace(regex=r'^ba.$', value='new') + A B + 0 new abc + 1 foo new + 2 bait xyz + >>> df.replace(regex={r'^ba.$':'new', 'foo':'xyz'}) + A B + 0 new abc + 1 xyz new + 2 bait xyz + >>> df.replace(regex=[r'^ba.$', 'foo'], value='new') + A B + 0 new abc + 1 new new + 2 bait xyz + + Note that when replacing multiple ``bool`` or ``datetime64`` objects, + the data types in the ``to_replace`` parameter must match the data + type of the value being replaced: + + >>> df = pd.DataFrame({'A': [True, False, True], + ... 'B': [False, True, False]}) + >>> df.replace({'a string': 'new value', True: False}) # raises + TypeError: Cannot compare types 'ndarray(dtype=bool)' and 'str' + + This raises a ``TypeError`` because one of the ``dict`` keys is not of + the correct type for replacement. + """) + + @Appender(_shared_docs['replace'] % _shared_doc_kwargs) def replace(self, to_replace=None, value=None, inplace=False, limit=None, regex=False, method='pad', axis=None): - """ - Replace values given in 'to_replace' with 'value' - """ inplace = validate_bool_kwarg(inplace, 'inplace') if not is_bool(regex) and to_replace is not None: raise AssertionError("'to_replace' must be 'None' if 'regex' is " diff --git a/pandas/core/series.py b/pandas/core/series.py index 594fd4863ee0d..e4b8979d6393a 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -10,7 +10,6 @@ import warnings from textwrap import dedent -import pandas as pd import numpy as np import numpy.ma as ma @@ -27,7 +26,6 @@ is_hashable, is_iterator, is_dict_like, - is_re_compilable, is_scalar, _is_unorderable_exception, _ensure_platform_int, @@ -40,7 +38,6 @@ maybe_cast_to_datetime, maybe_castable, construct_1d_arraylike_from_scalar) from pandas.core.dtypes.missing import isna, notna, remove_na_arraylike -import pandas.core.missing as missing from pandas.core.index import (Index, MultiIndex, InvalidIndexError, Float64Index, _ensure_index) @@ -55,7 +52,7 @@ from pandas import compat from pandas.io.formats.terminal import get_terminal_size from pandas.compat import ( - zip, lzip, u, OrderedDict, StringIO, range, get_range_parameters) + zip, u, OrderedDict, StringIO, range, get_range_parameters) from pandas.compat.numpy import function as nv import pandas.core.ops as ops @@ -2674,188 +2671,9 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, limit=limit, downcast=downcast, **kwargs) + @Appender(generic._shared_docs['replace'] % _shared_doc_kwargs) def replace(self, to_replace=None, value=None, inplace=False, limit=None, regex=False, method='pad', axis=None): - """ - Replace values given in 'to_replace' with 'value'. - - Parameters - ---------- - to_replace : str, regex, list, dict, Series, numeric, or None - - * numeric, str or regex: - - numeric: numeric values equal to ``to_replace`` will be - replaced with ``value`` - - str: string exactly matching ``to_replace`` will be replaced - with ``value`` - - regex: regexs matching ``to_replace`` will be replaced with - ``value`` - - * list of str, regex, or numeric: - - - First, if ``to_replace`` and ``value`` are both lists, they - **must** be the same length. - - Second, if ``regex=True`` then all of the strings in **both** - lists will be interpreted as regexs otherwise they will match - directly. This doesn't matter much for ``value`` since there - are only a few possible substitution regexes you can use. - - str and regex rules apply as above. - - * dict: - - - Dicts can be used to specify different replacement values - for different existing values. For example, - {'a': 'b', 'y': 'z'} replaces the value 'a' with 'b' and - 'y' with 'z'. To use a dict in this way the ``value`` - parameter should be ``None``. - - * None: - - - This means that the ``regex`` argument must be a string, - compiled regular expression, or list, dict, ndarray or Series - of such elements. If ``value`` is also ``None`` then this - **must** be a dictionary or ``Series``. - - See the examples section for examples of each of these. - value : scalar, dict, list, str, regex, default None - Value to replace any values matching ``to_replace`` with. Regular - expressions, strings and lists or dicts of such objects are also - allowed. - inplace : boolean, default False - If True, in place. Note: this will modify any - other views on this object (e.g. a column from a DataFrame). - Returns the caller if this is True. - limit : int, default None - Maximum size gap to forward or backward fill - regex : bool or same types as ``to_replace``, default False - Whether to interpret ``to_replace`` and/or ``value`` as regular - expressions. If this is ``True`` then ``to_replace`` *must* be a - string. Alternatively, this could be a regular expression or a - list, dict, or array of regular expressions in which case - ``to_replace`` must be ``None``. - method : string, optional, {'pad', 'ffill', 'bfill'} - The method to use when for replacement, when ``to_replace`` is a - ``list``. - - See Also - -------- - Series.fillna : Fill NA/NaN values - Series.where : Replace values based on boolean condition - - Returns - ------- - filled : Series - - Raises - ------ - AssertionError - * If ``regex`` is not a ``bool`` and ``to_replace`` is not - ``None``. - TypeError - * If ``to_replace`` is a ``dict`` and ``value`` is not a ``list``, - ``dict``, ``ndarray``, or ``Series`` - * If ``to_replace`` is ``None`` and ``regex`` is not compilable - into a - regular expression or is a list, dict, ndarray, or Series. - * When replacing multiple ``bool`` or ``datetime64`` objects and - the arguments to ``to_replace`` does not match the type of the - value being replaced - ValueError - * If a ``list`` or an ``ndarray`` is passed to ``to_replace`` and - ``value`` but they are not the same length. - - Notes - ----- - * Regex substitution is performed under the hood with ``re.sub``. The - rules for substitution for ``re.sub`` are the same. - * Regular expressions will only substitute on strings, meaning you - cannot provide, for example, a regular expression matching floating - point numbers and expect the columns in your frame that have a - numeric dtype to be matched. However, if those floating point numbers - *are* strings, then you can do this. - * This method has *a lot* of options. You are encouraged to experiment - and play with this method to gain intuition about how it works. - - Examples - -------- - - >>> s = pd.Series([0, 1, 2, 3, 4]) - >>> s.replace(0, 5) - 0 5 - 1 1 - 2 2 - 3 3 - 4 4 - dtype: int64 - - >>> s.replace([0, 1, 2], 300) - 0 300 - 1 300 - 2 300 - 3 3 - 4 4 - dtype: int64 - >>> s.replace([0, 1, 2], [100, 200, 300]) - 0 100 - 1 200 - 2 300 - 3 3 - 4 4 - dtype: int64 - >>> s.replace([1, 2], method='bfill') - 0 0 - 1 3 - 2 3 - 3 3 - 4 4 - dtype: int64 - - >>> s.replace({0: 10, 1: 100}) - 0 10 - 1 100 - 2 2 - 3 3 - 4 4 - dtype: int64 - - >>> s = pd.Series(['bat', 'foo', 'bar', 'bait']) - >>> s.replace(to_replace=r'^ba.$', value='new', regex=True) - 0 new - 1 foo - 2 new - 3 bait - dtype: object - >>> s.replace(regex=r'^ba.$', value='new') - 0 new - 1 foo - 2 new - 3 bait - dtype: object - >>> s.replace(regex={r'^ba.$':'new', 'foo':'xyz'}) - 0 new - 1 xyz - 2 new - 3 bait - dtype: object - >>> s.replace(regex=[r'^ba.$', 'foo'], value='new') - 0 new - 1 new - 2 new - 3 bait - dtype: object - - Note that when replacing multiple ``bool`` or ``datetime64`` objects, - the data types in the ``to_replace`` parameter must match the data - type of the value being replaced: - - >>> s = pd.Series([True, False, True]) - >>> s.replace({'a string': 'new value', True: False}) # raises - TypeError: Cannot compare types 'ndarray(dtype=bool)' and 'str' - - This raises a ``TypeError`` because one of the ``dict`` keys is not of - the correct type for replacement. - - """ return super(Series, self).replace(to_replace=to_replace, value=value, inplace=inplace, limit=limit, regex=regex, method=method, From 008588cecea2342b88ea5aaf7ee70b930dc273ee Mon Sep 17 00:00:00 2001 From: reidy-p Date: Sat, 3 Feb 2018 21:49:52 +0000 Subject: [PATCH 5/5] small typo --- pandas/core/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index c1a5b393d312b..0f038cd687dfd 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4815,7 +4815,7 @@ def bfill(self, axis=None, inplace=False, limit=None, downcast=None): lists will be interpreted as regexs otherwise they will match directly. This doesn't matter much for ``value`` since there are only a few possible substitution regexes you can use. - - str and regex rules apply as above. + - str, regex and numeric rules apply as above. * dict: