Skip to content

Commit bc1d027

Browse files
reidy-pjreback
authored andcommitted
DOC: Improve replace docstring (pandas-dev#18100)
1 parent 56dbaae commit bc1d027

File tree

3 files changed

+187
-41
lines changed

3 files changed

+187
-41
lines changed

pandas/core/frame.py

+8
Original file line numberDiff line numberDiff line change
@@ -3080,6 +3080,14 @@ def fillna(self, value=None, method=None, axis=None, inplace=False,
30803080
inplace=inplace, limit=limit,
30813081
downcast=downcast, **kwargs)
30823082

3083+
@Appender(_shared_docs['replace'] % _shared_doc_kwargs)
3084+
def replace(self, to_replace=None, value=None, inplace=False, limit=None,
3085+
regex=False, method='pad', axis=None):
3086+
return super(DataFrame, self).replace(to_replace=to_replace,
3087+
value=value, inplace=inplace,
3088+
limit=limit, regex=regex,
3089+
method=method, axis=axis)
3090+
30833091
@Appender(_shared_docs['shift'] % _shared_doc_kwargs)
30843092
def shift(self, periods=1, freq=None, axis=0):
30853093
return super(DataFrame, self).shift(periods=periods, freq=freq,

pandas/core/generic.py

+171-41
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,10 @@
6969

7070

7171
def _single_replace(self, to_replace, method, inplace, limit):
72+
"""
73+
Replaces values in a Series using the fill method specified when no
74+
replacement value is given in the replace method
75+
"""
7276
if self.ndim != 1:
7377
raise TypeError('cannot replace {0} with method {1} on a {2}'
7478
.format(to_replace, method, type(self).__name__))
@@ -4787,94 +4791,111 @@ def bfill(self, axis=None, inplace=False, limit=None, downcast=None):
47874791
return self.fillna(method='bfill', axis=axis, inplace=inplace,
47884792
limit=limit, downcast=downcast)
47894793

4790-
def replace(self, to_replace=None, value=None, inplace=False, limit=None,
4791-
regex=False, method='pad', axis=None):
4792-
"""
4794+
_shared_docs['replace'] = ("""
47934795
Replace values given in 'to_replace' with 'value'.
47944796
47954797
Parameters
47964798
----------
47974799
to_replace : str, regex, list, dict, Series, numeric, or None
47984800
4799-
* str or regex:
4801+
* numeric, str or regex:
48004802
4801-
- str: string exactly matching `to_replace` will be replaced
4802-
with `value`
4803-
- regex: regexs matching `to_replace` will be replaced with
4804-
`value`
4803+
- numeric: numeric values equal to ``to_replace`` will be
4804+
replaced with ``value``
4805+
- str: string exactly matching ``to_replace`` will be replaced
4806+
with ``value``
4807+
- regex: regexs matching ``to_replace`` will be replaced with
4808+
``value``
48054809
48064810
* list of str, regex, or numeric:
48074811
4808-
- First, if `to_replace` and `value` are both lists, they
4812+
- First, if ``to_replace`` and ``value`` are both lists, they
48094813
**must** be the same length.
48104814
- Second, if ``regex=True`` then all of the strings in **both**
48114815
lists will be interpreted as regexs otherwise they will match
4812-
directly. This doesn't matter much for `value` since there
4816+
directly. This doesn't matter much for ``value`` since there
48134817
are only a few possible substitution regexes you can use.
4814-
- str and regex rules apply as above.
4818+
- str, regex and numeric rules apply as above.
48154819
48164820
* dict:
48174821
4818-
- Nested dictionaries, e.g., {'a': {'b': nan}}, are read as
4819-
follows: look in column 'a' for the value 'b' and replace it
4820-
with nan. You can nest regular expressions as well. Note that
4822+
- Dicts can be used to specify different replacement values
4823+
for different existing values. For example,
4824+
{'a': 'b', 'y': 'z'} replaces the value 'a' with 'b' and
4825+
'y' with 'z'. To use a dict in this way the ``value``
4826+
parameter should be ``None``.
4827+
- For a DataFrame a dict can specify that different values
4828+
should be replaced in different columns. For example,
4829+
{'a': 1, 'b': 'z'} looks for the value 1 in column 'a' and
4830+
the value 'z' in column 'b' and replaces these values with
4831+
whatever is specified in ``value``. The ``value`` parameter
4832+
should not be ``None`` in this case. You can treat this as a
4833+
special case of passing two lists except that you are
4834+
specifying the column to search in.
4835+
- For a DataFrame nested dictionaries, e.g.,
4836+
{'a': {'b': np.nan}}, are read as follows: look in column 'a'
4837+
for the value 'b' and replace it with NaN. The ``value``
4838+
parameter should be ``None`` to use a nested dict in this
4839+
way. You can nest regular expressions as well. Note that
48214840
column names (the top-level dictionary keys in a nested
48224841
dictionary) **cannot** be regular expressions.
4823-
- Keys map to column names and values map to substitution
4824-
values. You can treat this as a special case of passing two
4825-
lists except that you are specifying the column to search in.
48264842
48274843
* None:
48284844
48294845
- This means that the ``regex`` argument must be a string,
48304846
compiled regular expression, or list, dict, ndarray or Series
4831-
of such elements. If `value` is also ``None`` then this
4847+
of such elements. If ``value`` is also ``None`` then this
48324848
**must** be a nested dictionary or ``Series``.
48334849
48344850
See the examples section for examples of each of these.
48354851
value : scalar, dict, list, str, regex, default None
4836-
Value to use to fill holes (e.g. 0), alternately a dict of values
4837-
specifying which value to use for each column (columns not in the
4838-
dict will not be filled). Regular expressions, strings and lists or
4839-
dicts of such objects are also allowed.
4852+
Value to replace any values matching ``to_replace`` with.
4853+
For a DataFrame a dict of values can be used to specify which
4854+
value to use for each column (columns not in the dict will not be
4855+
filled). Regular expressions, strings and lists or dicts of such
4856+
objects are also allowed.
48404857
inplace : boolean, default False
48414858
If True, in place. Note: this will modify any
48424859
other views on this object (e.g. a column from a DataFrame).
48434860
Returns the caller if this is True.
48444861
limit : int, default None
48454862
Maximum size gap to forward or backward fill
4846-
regex : bool or same types as `to_replace`, default False
4847-
Whether to interpret `to_replace` and/or `value` as regular
4848-
expressions. If this is ``True`` then `to_replace` *must* be a
4849-
string. Otherwise, `to_replace` must be ``None`` because this
4850-
parameter will be interpreted as a regular expression or a list,
4851-
dict, or array of regular expressions.
4863+
regex : bool or same types as ``to_replace``, default False
4864+
Whether to interpret ``to_replace`` and/or ``value`` as regular
4865+
expressions. If this is ``True`` then ``to_replace`` *must* be a
4866+
string. Alternatively, this could be a regular expression or a
4867+
list, dict, or array of regular expressions in which case
4868+
``to_replace`` must be ``None``.
48524869
method : string, optional, {'pad', 'ffill', 'bfill'}
48534870
The method to use when for replacement, when ``to_replace`` is a
48544871
``list``.
48554872
48564873
See Also
48574874
--------
4858-
NDFrame.reindex
4859-
NDFrame.asfreq
4860-
NDFrame.fillna
4875+
%(klass)s.fillna : Fill NA/NaN values
4876+
%(klass)s.where : Replace values based on boolean condition
48614877
48624878
Returns
48634879
-------
4864-
filled : NDFrame
4880+
filled : %(klass)s
48654881
48664882
Raises
48674883
------
48684884
AssertionError
4869-
* If `regex` is not a ``bool`` and `to_replace` is not ``None``.
4885+
* If ``regex`` is not a ``bool`` and ``to_replace`` is not
4886+
``None``.
48704887
TypeError
4871-
* If `to_replace` is a ``dict`` and `value` is not a ``list``,
4888+
* If ``to_replace`` is a ``dict`` and ``value`` is not a ``list``,
48724889
``dict``, ``ndarray``, or ``Series``
4873-
* If `to_replace` is ``None`` and `regex` is not compilable into a
4874-
regular expression or is a list, dict, ndarray, or Series.
4890+
* If ``to_replace`` is ``None`` and ``regex`` is not compilable
4891+
into a regular expression or is a list, dict, ndarray, or
4892+
Series.
4893+
* When replacing multiple ``bool`` or ``datetime64`` objects and
4894+
the arguments to ``to_replace`` does not match the type of the
4895+
value being replaced
48754896
ValueError
4876-
* If `to_replace` and `value` are ``list`` s or ``ndarray`` s, but
4877-
they are not the same length.
4897+
* If a ``list`` or an ``ndarray`` is passed to ``to_replace`` and
4898+
`value` but they are not the same length.
48784899
48794900
Notes
48804901
-----
@@ -4883,12 +4904,121 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None,
48834904
* Regular expressions will only substitute on strings, meaning you
48844905
cannot provide, for example, a regular expression matching floating
48854906
point numbers and expect the columns in your frame that have a
4886-
numeric dtype to be matched. However, if those floating point numbers
4887-
*are* strings, then you can do this.
4907+
numeric dtype to be matched. However, if those floating point
4908+
numbers *are* strings, then you can do this.
48884909
* This method has *a lot* of options. You are encouraged to experiment
48894910
and play with this method to gain intuition about how it works.
48904911
4891-
"""
4912+
Examples
4913+
--------
4914+
4915+
>>> s = pd.Series([0, 1, 2, 3, 4])
4916+
>>> s.replace(0, 5)
4917+
0 5
4918+
1 1
4919+
2 2
4920+
3 3
4921+
4 4
4922+
dtype: int64
4923+
>>> df = pd.DataFrame({'A': [0, 1, 2, 3, 4],
4924+
... 'B': [5, 6, 7, 8, 9],
4925+
... 'C': ['a', 'b', 'c', 'd', 'e']})
4926+
>>> df.replace(0, 5)
4927+
A B C
4928+
0 5 5 a
4929+
1 1 6 b
4930+
2 2 7 c
4931+
3 3 8 d
4932+
4 4 9 e
4933+
4934+
>>> df.replace([0, 1, 2, 3], 4)
4935+
A B C
4936+
0 4 5 a
4937+
1 4 6 b
4938+
2 4 7 c
4939+
3 4 8 d
4940+
4 4 9 e
4941+
>>> df.replace([0, 1, 2, 3], [4, 3, 2, 1])
4942+
A B C
4943+
0 4 5 a
4944+
1 3 6 b
4945+
2 2 7 c
4946+
3 1 8 d
4947+
4 4 9 e
4948+
>>> s.replace([1, 2], method='bfill')
4949+
0 0
4950+
1 3
4951+
2 3
4952+
3 3
4953+
4 4
4954+
dtype: int64
4955+
4956+
>>> df.replace({0: 10, 1: 100})
4957+
A B C
4958+
0 10 5 a
4959+
1 100 6 b
4960+
2 2 7 c
4961+
3 3 8 d
4962+
4 4 9 e
4963+
>>> df.replace({'A': 0, 'B': 5}, 100)
4964+
A B C
4965+
0 100 100 a
4966+
1 1 6 b
4967+
2 2 7 c
4968+
3 3 8 d
4969+
4 4 9 e
4970+
>>> df.replace({'A': {0: 100, 4: 400}})
4971+
A B C
4972+
0 100 5 a
4973+
1 1 6 b
4974+
2 2 7 c
4975+
3 3 8 d
4976+
4 400 9 e
4977+
4978+
>>> df = pd.DataFrame({'A': ['bat', 'foo', 'bait'],
4979+
... 'B': ['abc', 'bar', 'xyz']})
4980+
>>> df.replace(to_replace=r'^ba.$', value='new', regex=True)
4981+
A B
4982+
0 new abc
4983+
1 foo new
4984+
2 bait xyz
4985+
>>> df.replace({'A': r'^ba.$'}, {'A': 'new'}, regex=True)
4986+
A B
4987+
0 new abc
4988+
1 foo bar
4989+
2 bait xyz
4990+
>>> df.replace(regex=r'^ba.$', value='new')
4991+
A B
4992+
0 new abc
4993+
1 foo new
4994+
2 bait xyz
4995+
>>> df.replace(regex={r'^ba.$':'new', 'foo':'xyz'})
4996+
A B
4997+
0 new abc
4998+
1 xyz new
4999+
2 bait xyz
5000+
>>> df.replace(regex=[r'^ba.$', 'foo'], value='new')
5001+
A B
5002+
0 new abc
5003+
1 new new
5004+
2 bait xyz
5005+
5006+
Note that when replacing multiple ``bool`` or ``datetime64`` objects,
5007+
the data types in the ``to_replace`` parameter must match the data
5008+
type of the value being replaced:
5009+
5010+
>>> df = pd.DataFrame({'A': [True, False, True],
5011+
... 'B': [False, True, False]})
5012+
>>> df.replace({'a string': 'new value', True: False}) # raises
5013+
TypeError: Cannot compare types 'ndarray(dtype=bool)' and 'str'
5014+
5015+
This raises a ``TypeError`` because one of the ``dict`` keys is not of
5016+
the correct type for replacement.
5017+
""")
5018+
5019+
@Appender(_shared_docs['replace'] % _shared_doc_kwargs)
5020+
def replace(self, to_replace=None, value=None, inplace=False, limit=None,
5021+
regex=False, method='pad', axis=None):
48925022
inplace = validate_bool_kwarg(inplace, 'inplace')
48935023
if not is_bool(regex) and to_replace is not None:
48945024
raise AssertionError("'to_replace' must be 'None' if 'regex' is "

pandas/core/series.py

+8
Original file line numberDiff line numberDiff line change
@@ -2671,6 +2671,14 @@ def fillna(self, value=None, method=None, axis=None, inplace=False,
26712671
limit=limit, downcast=downcast,
26722672
**kwargs)
26732673

2674+
@Appender(generic._shared_docs['replace'] % _shared_doc_kwargs)
2675+
def replace(self, to_replace=None, value=None, inplace=False, limit=None,
2676+
regex=False, method='pad', axis=None):
2677+
return super(Series, self).replace(to_replace=to_replace, value=value,
2678+
inplace=inplace, limit=limit,
2679+
regex=regex, method=method,
2680+
axis=axis)
2681+
26742682
@Appender(generic._shared_docs['shift'] % _shared_doc_kwargs)
26752683
def shift(self, periods=1, freq=None, axis=0):
26762684
return super(Series, self).shift(periods=periods, freq=freq, axis=axis)

0 commit comments

Comments
 (0)