69
69
70
70
71
71
def _single_replace (self , to_replace , method , inplace , limit ):
72
+ """
73
+ Replaces values in a Series using the fill method specified when no
74
+ replacement value is given in the replace method
75
+ """
72
76
if self .ndim != 1 :
73
77
raise TypeError ('cannot replace {0} with method {1} on a {2}'
74
78
.format (to_replace , method , type (self ).__name__ ))
@@ -4787,94 +4791,111 @@ def bfill(self, axis=None, inplace=False, limit=None, downcast=None):
4787
4791
return self .fillna (method = 'bfill' , axis = axis , inplace = inplace ,
4788
4792
limit = limit , downcast = downcast )
4789
4793
4790
- def replace (self , to_replace = None , value = None , inplace = False , limit = None ,
4791
- regex = False , method = 'pad' , axis = None ):
4792
- """
4794
+ _shared_docs ['replace' ] = ("""
4793
4795
Replace values given in 'to_replace' with 'value'.
4794
4796
4795
4797
Parameters
4796
4798
----------
4797
4799
to_replace : str, regex, list, dict, Series, numeric, or None
4798
4800
4799
- * str or regex:
4801
+ * numeric, str or regex:
4800
4802
4801
- - str: string exactly matching `to_replace` will be replaced
4802
- with `value`
4803
- - regex: regexs matching `to_replace` will be replaced with
4804
- `value`
4803
+ - numeric: numeric values equal to ``to_replace`` will be
4804
+ replaced with ``value``
4805
+ - str: string exactly matching ``to_replace`` will be replaced
4806
+ with ``value``
4807
+ - regex: regexs matching ``to_replace`` will be replaced with
4808
+ ``value``
4805
4809
4806
4810
* list of str, regex, or numeric:
4807
4811
4808
- - First, if `to_replace` and `value` are both lists, they
4812
+ - First, if `` to_replace`` and `` value` ` are both lists, they
4809
4813
**must** be the same length.
4810
4814
- Second, if ``regex=True`` then all of the strings in **both**
4811
4815
lists will be interpreted as regexs otherwise they will match
4812
- directly. This doesn't matter much for `value` since there
4816
+ directly. This doesn't matter much for `` value` ` since there
4813
4817
are only a few possible substitution regexes you can use.
4814
- - str and regex rules apply as above.
4818
+ - str, regex and numeric rules apply as above.
4815
4819
4816
4820
* dict:
4817
4821
4818
- - Nested dictionaries, e.g., {'a': {'b': nan}}, are read as
4819
- follows: look in column 'a' for the value 'b' and replace it
4820
- with nan. You can nest regular expressions as well. Note that
4822
+ - Dicts can be used to specify different replacement values
4823
+ for different existing values. For example,
4824
+ {'a': 'b', 'y': 'z'} replaces the value 'a' with 'b' and
4825
+ 'y' with 'z'. To use a dict in this way the ``value``
4826
+ parameter should be ``None``.
4827
+ - For a DataFrame a dict can specify that different values
4828
+ should be replaced in different columns. For example,
4829
+ {'a': 1, 'b': 'z'} looks for the value 1 in column 'a' and
4830
+ the value 'z' in column 'b' and replaces these values with
4831
+ whatever is specified in ``value``. The ``value`` parameter
4832
+ should not be ``None`` in this case. You can treat this as a
4833
+ special case of passing two lists except that you are
4834
+ specifying the column to search in.
4835
+ - For a DataFrame nested dictionaries, e.g.,
4836
+ {'a': {'b': np.nan}}, are read as follows: look in column 'a'
4837
+ for the value 'b' and replace it with NaN. The ``value``
4838
+ parameter should be ``None`` to use a nested dict in this
4839
+ way. You can nest regular expressions as well. Note that
4821
4840
column names (the top-level dictionary keys in a nested
4822
4841
dictionary) **cannot** be regular expressions.
4823
- - Keys map to column names and values map to substitution
4824
- values. You can treat this as a special case of passing two
4825
- lists except that you are specifying the column to search in.
4826
4842
4827
4843
* None:
4828
4844
4829
4845
- This means that the ``regex`` argument must be a string,
4830
4846
compiled regular expression, or list, dict, ndarray or Series
4831
- of such elements. If `value` is also ``None`` then this
4847
+ of such elements. If `` value` ` is also ``None`` then this
4832
4848
**must** be a nested dictionary or ``Series``.
4833
4849
4834
4850
See the examples section for examples of each of these.
4835
4851
value : scalar, dict, list, str, regex, default None
4836
- Value to use to fill holes (e.g. 0), alternately a dict of values
4837
- specifying which value to use for each column (columns not in the
4838
- dict will not be filled). Regular expressions, strings and lists or
4839
- dicts of such objects are also allowed.
4852
+ Value to replace any values matching ``to_replace`` with.
4853
+ For a DataFrame a dict of values can be used to specify which
4854
+ value to use for each column (columns not in the dict will not be
4855
+ filled). Regular expressions, strings and lists or dicts of such
4856
+ objects are also allowed.
4840
4857
inplace : boolean, default False
4841
4858
If True, in place. Note: this will modify any
4842
4859
other views on this object (e.g. a column from a DataFrame).
4843
4860
Returns the caller if this is True.
4844
4861
limit : int, default None
4845
4862
Maximum size gap to forward or backward fill
4846
- regex : bool or same types as `to_replace`, default False
4847
- Whether to interpret `to_replace` and/or `value` as regular
4848
- expressions. If this is ``True`` then `to_replace` *must* be a
4849
- string. Otherwise, `to_replace` must be ``None`` because this
4850
- parameter will be interpreted as a regular expression or a list,
4851
- dict, or array of regular expressions .
4863
+ regex : bool or same types as `` to_replace` `, default False
4864
+ Whether to interpret `` to_replace`` and/or `` value` ` as regular
4865
+ expressions. If this is ``True`` then `` to_replace` ` *must* be a
4866
+ string. Alternatively, this could be a regular expression or a
4867
+ list, dict, or array of regular expressions in which case
4868
+ ``to_replace`` must be ``None`` .
4852
4869
method : string, optional, {'pad', 'ffill', 'bfill'}
4853
4870
The method to use when for replacement, when ``to_replace`` is a
4854
4871
``list``.
4855
4872
4856
4873
See Also
4857
4874
--------
4858
- NDFrame.reindex
4859
- NDFrame.asfreq
4860
- NDFrame.fillna
4875
+ %(klass)s.fillna : Fill NA/NaN values
4876
+ %(klass)s.where : Replace values based on boolean condition
4861
4877
4862
4878
Returns
4863
4879
-------
4864
- filled : NDFrame
4880
+ filled : %(klass)s
4865
4881
4866
4882
Raises
4867
4883
------
4868
4884
AssertionError
4869
- * If `regex` is not a ``bool`` and `to_replace` is not ``None``.
4885
+ * If ``regex`` is not a ``bool`` and ``to_replace`` is not
4886
+ ``None``.
4870
4887
TypeError
4871
- * If `to_replace` is a ``dict`` and `value` is not a ``list``,
4888
+ * If `` to_replace`` is a ``dict`` and `` value` ` is not a ``list``,
4872
4889
``dict``, ``ndarray``, or ``Series``
4873
- * If `to_replace` is ``None`` and `regex` is not compilable into a
4874
- regular expression or is a list, dict, ndarray, or Series.
4890
+ * If ``to_replace`` is ``None`` and ``regex`` is not compilable
4891
+ into a regular expression or is a list, dict, ndarray, or
4892
+ Series.
4893
+ * When replacing multiple ``bool`` or ``datetime64`` objects and
4894
+ the arguments to ``to_replace`` does not match the type of the
4895
+ value being replaced
4875
4896
ValueError
4876
- * If `to_replace` and `value` are ``list `` s or ``ndarray `` s, but
4877
- they are not the same length.
4897
+ * If a ``list`` or an ``ndarray `` is passed to ``to_replace `` and
4898
+ `value` but they are not the same length.
4878
4899
4879
4900
Notes
4880
4901
-----
@@ -4883,12 +4904,121 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None,
4883
4904
* Regular expressions will only substitute on strings, meaning you
4884
4905
cannot provide, for example, a regular expression matching floating
4885
4906
point numbers and expect the columns in your frame that have a
4886
- numeric dtype to be matched. However, if those floating point numbers
4887
- *are* strings, then you can do this.
4907
+ numeric dtype to be matched. However, if those floating point
4908
+ numbers *are* strings, then you can do this.
4888
4909
* This method has *a lot* of options. You are encouraged to experiment
4889
4910
and play with this method to gain intuition about how it works.
4890
4911
4891
- """
4912
+ Examples
4913
+ --------
4914
+
4915
+ >>> s = pd.Series([0, 1, 2, 3, 4])
4916
+ >>> s.replace(0, 5)
4917
+ 0 5
4918
+ 1 1
4919
+ 2 2
4920
+ 3 3
4921
+ 4 4
4922
+ dtype: int64
4923
+ >>> df = pd.DataFrame({'A': [0, 1, 2, 3, 4],
4924
+ ... 'B': [5, 6, 7, 8, 9],
4925
+ ... 'C': ['a', 'b', 'c', 'd', 'e']})
4926
+ >>> df.replace(0, 5)
4927
+ A B C
4928
+ 0 5 5 a
4929
+ 1 1 6 b
4930
+ 2 2 7 c
4931
+ 3 3 8 d
4932
+ 4 4 9 e
4933
+
4934
+ >>> df.replace([0, 1, 2, 3], 4)
4935
+ A B C
4936
+ 0 4 5 a
4937
+ 1 4 6 b
4938
+ 2 4 7 c
4939
+ 3 4 8 d
4940
+ 4 4 9 e
4941
+ >>> df.replace([0, 1, 2, 3], [4, 3, 2, 1])
4942
+ A B C
4943
+ 0 4 5 a
4944
+ 1 3 6 b
4945
+ 2 2 7 c
4946
+ 3 1 8 d
4947
+ 4 4 9 e
4948
+ >>> s.replace([1, 2], method='bfill')
4949
+ 0 0
4950
+ 1 3
4951
+ 2 3
4952
+ 3 3
4953
+ 4 4
4954
+ dtype: int64
4955
+
4956
+ >>> df.replace({0: 10, 1: 100})
4957
+ A B C
4958
+ 0 10 5 a
4959
+ 1 100 6 b
4960
+ 2 2 7 c
4961
+ 3 3 8 d
4962
+ 4 4 9 e
4963
+ >>> df.replace({'A': 0, 'B': 5}, 100)
4964
+ A B C
4965
+ 0 100 100 a
4966
+ 1 1 6 b
4967
+ 2 2 7 c
4968
+ 3 3 8 d
4969
+ 4 4 9 e
4970
+ >>> df.replace({'A': {0: 100, 4: 400}})
4971
+ A B C
4972
+ 0 100 5 a
4973
+ 1 1 6 b
4974
+ 2 2 7 c
4975
+ 3 3 8 d
4976
+ 4 400 9 e
4977
+
4978
+ >>> df = pd.DataFrame({'A': ['bat', 'foo', 'bait'],
4979
+ ... 'B': ['abc', 'bar', 'xyz']})
4980
+ >>> df.replace(to_replace=r'^ba.$', value='new', regex=True)
4981
+ A B
4982
+ 0 new abc
4983
+ 1 foo new
4984
+ 2 bait xyz
4985
+ >>> df.replace({'A': r'^ba.$'}, {'A': 'new'}, regex=True)
4986
+ A B
4987
+ 0 new abc
4988
+ 1 foo bar
4989
+ 2 bait xyz
4990
+ >>> df.replace(regex=r'^ba.$', value='new')
4991
+ A B
4992
+ 0 new abc
4993
+ 1 foo new
4994
+ 2 bait xyz
4995
+ >>> df.replace(regex={r'^ba.$':'new', 'foo':'xyz'})
4996
+ A B
4997
+ 0 new abc
4998
+ 1 xyz new
4999
+ 2 bait xyz
5000
+ >>> df.replace(regex=[r'^ba.$', 'foo'], value='new')
5001
+ A B
5002
+ 0 new abc
5003
+ 1 new new
5004
+ 2 bait xyz
5005
+
5006
+ Note that when replacing multiple ``bool`` or ``datetime64`` objects,
5007
+ the data types in the ``to_replace`` parameter must match the data
5008
+ type of the value being replaced:
5009
+
5010
+ >>> df = pd.DataFrame({'A': [True, False, True],
5011
+ ... 'B': [False, True, False]})
5012
+ >>> df.replace({'a string': 'new value', True: False}) # raises
5013
+ TypeError: Cannot compare types 'ndarray(dtype=bool)' and 'str'
5014
+
5015
+ This raises a ``TypeError`` because one of the ``dict`` keys is not of
5016
+ the correct type for replacement.
5017
+ """ )
5018
+
5019
+ @Appender (_shared_docs ['replace' ] % _shared_doc_kwargs )
5020
+ def replace (self , to_replace = None , value = None , inplace = False , limit = None ,
5021
+ regex = False , method = 'pad' , axis = None ):
4892
5022
inplace = validate_bool_kwarg (inplace , 'inplace' )
4893
5023
if not is_bool (regex ) and to_replace is not None :
4894
5024
raise AssertionError ("'to_replace' must be 'None' if 'regex' is "
0 commit comments