-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
BUG: inconsistency between replace dict using integers and using strings (#20656) #21477
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
d8f2d70
3afd287
2bafaaa
f76b2e2
6f836b4
dd916d2
7b1624b
4729fc5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1689,6 +1689,44 @@ def _nanpercentile(values, q, axis, **kw): | |
placement=np.arange(len(result)), | ||
ndim=ndim) | ||
|
||
def _replace_coerce(self, mask=None, src=None, dst=None, inplace=True, | ||
convert=False, regex=False, mgr=None): | ||
""" | ||
Replace value corresponding to the given boolean array with another | ||
value. | ||
|
||
Parameters | ||
---------- | ||
mask : array_like of bool | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you add optional to all of these args There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you reorder these args to match as much as possible _replace_single |
||
The mask of values to replace. | ||
src : object | ||
The value to replace. It is ignored if regex is False. | ||
dst : object | ||
The value to be replaced with. | ||
convert : bool | ||
If true, try to coerce any object types to better types. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. add the inplace arg |
||
regex : bool | ||
If true, search for element matching with the pattern in src. | ||
Masked element is ignored. | ||
mgr : BlockPlacement, optional | ||
|
||
Returns | ||
------- | ||
A new block if there is anything to replace or the original block. | ||
""" | ||
|
||
if mask.any(): | ||
if not regex: | ||
self = self.coerce_to_target_dtype(dst) | ||
return self.putmask(mask, dst, inplace=inplace) | ||
else: | ||
return self._replace_single(src, dst, inplace=inplace, | ||
regex=regex, | ||
convert=convert, | ||
mask=mask, | ||
mgr=mgr) | ||
return self | ||
|
||
|
||
class ScalarBlock(Block): | ||
""" | ||
|
@@ -2464,7 +2502,7 @@ def replace(self, to_replace, value, inplace=False, filter=None, | |
regex=regex, mgr=mgr) | ||
|
||
def _replace_single(self, to_replace, value, inplace=False, filter=None, | ||
regex=False, convert=True, mgr=None): | ||
regex=False, convert=True, mgr=None, mask=None): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you add a doc-string here |
||
|
||
inplace = validate_bool_kwarg(inplace, 'inplace') | ||
|
||
|
@@ -2531,15 +2569,56 @@ def re_replacer(s): | |
else: | ||
filt = self.mgr_locs.isin(filter).nonzero()[0] | ||
|
||
new_values[filt] = f(new_values[filt]) | ||
if mask is None: | ||
new_values[filt] = f(new_values[filt]) | ||
else: | ||
new_values[filt][mask] = f(new_values[filt][mask]) | ||
|
||
# convert | ||
block = self.make_block(new_values) | ||
if convert: | ||
block = block.convert(by_item=True, numeric=False) | ||
|
||
return block | ||
|
||
def _replace_coerce(self, mask=None, src=None, dst=None, inplace=True, | ||
convert=False, regex=False, mgr=None): | ||
""" | ||
Replace value corresponding to the given boolean array with another | ||
value. | ||
|
||
Parameters | ||
---------- | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same as above (you can make a shared doc-string if you want here) |
||
mask : array_like of bool | ||
The mask of values to replace. | ||
src : object | ||
The value to replace. It is ignored if regex is False. | ||
dst : object | ||
The value to be replaced with. | ||
convert : bool | ||
If true, try to coerce any object types to better types. | ||
regex : bool | ||
If true, search for element matching with the pattern in src. | ||
Masked element is ignored. | ||
mgr : BlockPlacement, optional | ||
|
||
Returns | ||
------- | ||
A new block if there is anything to replace or the original block. | ||
""" | ||
if mask.any(): | ||
block = super(ObjectBlock, self)._replace_coerce(mask=mask, | ||
src=src, | ||
dst=dst, | ||
inplace=inplace, | ||
convert=convert, | ||
regex=regex, | ||
mgr=mgr) | ||
if convert: | ||
block = [b.convert(by_item=True, numeric=False, copy=True) | ||
for b in block] | ||
return block | ||
return self | ||
|
||
|
||
class CategoricalBlock(ExtensionBlock): | ||
__slots__ = () | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,6 +3,7 @@ | |
from functools import partial | ||
import itertools | ||
import operator | ||
import re | ||
|
||
import numpy as np | ||
|
||
|
@@ -19,11 +20,13 @@ | |
is_datetimelike_v_numeric, | ||
is_numeric_v_string_like, is_extension_type, | ||
is_extension_array_dtype, | ||
is_scalar) | ||
is_scalar, | ||
is_re_compilable) | ||
from pandas.core.dtypes.cast import ( | ||
maybe_promote, | ||
infer_dtype_from_scalar, | ||
find_common_type) | ||
find_common_type, | ||
maybe_convert_objects) | ||
from pandas.core.dtypes.missing import isna | ||
import pandas.core.dtypes.concat as _concat | ||
from pandas.core.dtypes.generic import ABCSeries, ABCExtensionArray | ||
|
@@ -571,12 +574,17 @@ def replace_list(self, src_list, dest_list, inplace=False, regex=False, | |
# figure out our mask a-priori to avoid repeated replacements | ||
values = self.as_array() | ||
|
||
def comp(s): | ||
def comp(s, reg=False): | ||
if isna(s): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you add a doc-string, rename reg -> regex |
||
return isna(values) | ||
return _maybe_compare(values, getattr(s, 'asm8', s), operator.eq) | ||
if hasattr(s, 'asm8'): | ||
return _maybe_compare(maybe_convert_objects(values), | ||
getattr(s, 'asm8'), reg) | ||
if reg and is_re_compilable(s): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. these are the same? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. My mistake. At first, I wanted to raise a ValueError when regex is True but replacer is not reg compilable. It might not be a good idea to do it here. I will delete the if condition. |
||
return _maybe_compare(values, s, reg) | ||
return _maybe_compare(values, s, reg) | ||
|
||
masks = [comp(s) for i, s in enumerate(src_list)] | ||
masks = [comp(s, regex) for i, s in enumerate(src_list)] | ||
|
||
result_blocks = [] | ||
src_len = len(src_list) - 1 | ||
|
@@ -588,20 +596,16 @@ def comp(s): | |
for i, (s, d) in enumerate(zip(src_list, dest_list)): | ||
new_rb = [] | ||
for b in rb: | ||
if b.dtype == np.object_: | ||
convert = i == src_len | ||
result = b.replace(s, d, inplace=inplace, regex=regex, | ||
mgr=mgr, convert=convert) | ||
m = masks[i][b.mgr_locs.indexer] | ||
convert = i == src_len | ||
result = b._replace_coerce(mask=m, src=s, dst=d, | ||
inplace=inplace, | ||
convert=convert, regex=regex, | ||
mgr=mgr) | ||
if m.any(): | ||
new_rb = _extend_blocks(result, new_rb) | ||
else: | ||
# get our mask for this element, sized to this | ||
# particular block | ||
m = masks[i][b.mgr_locs.indexer] | ||
if m.any(): | ||
b = b.coerce_to_target_dtype(d) | ||
new_rb.extend(b.putmask(m, d, inplace=True)) | ||
else: | ||
new_rb.append(b) | ||
new_rb.append(b) | ||
rb = new_rb | ||
result_blocks.extend(rb) | ||
|
||
|
@@ -1890,7 +1894,12 @@ def _consolidate(blocks): | |
return new_blocks | ||
|
||
|
||
def _maybe_compare(a, b, op): | ||
def _maybe_compare(a, b, regex=False): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you make this slightly more verbose name, can you move to pandas/core/ops.py (cc @jbrockmendel good location)? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we anticipate using it elsewhere? If not I'd leave it here at least for now. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. like name it |
||
if not regex: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you add a doc-string here |
||
op = lambda x: operator.eq(x, b) | ||
else: | ||
op = np.vectorize(lambda x: bool(re.match(b, x)) if isinstance(x, str) | ||
else False) | ||
|
||
is_a_array = isinstance(a, np.ndarray) | ||
is_b_array = isinstance(b, np.ndarray) | ||
|
@@ -1902,9 +1911,8 @@ def _maybe_compare(a, b, op): | |
# numpy deprecation warning if comparing numeric vs string-like | ||
elif is_numeric_v_string_like(a, b): | ||
result = False | ||
|
||
else: | ||
result = op(a, b) | ||
result = op(a) | ||
|
||
if is_scalar(result) and (is_a_array or is_b_array): | ||
type_names = [type(a).__name__, type(b).__name__] | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -243,6 +243,13 @@ def test_replace_string_with_number(self): | |
expected = pd.Series([1, 2, 3]) | ||
tm.assert_series_equal(expected, result) | ||
|
||
def test_repace_intertwined_key_value_dict(self): | ||
# GH 20656 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you add a 1-liner explaining the test in a bit more detail There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. typo repace --> replace (?) |
||
s = pd.Series(['a', 'b']) | ||
expected = pd.Series(['b', 'a']) | ||
result = s.replace({'a': 'b', 'b': 'a'}) | ||
tm.assert_series_equal(expected, result) | ||
|
||
def test_replace_unicode_with_number(self): | ||
# GH 15743 | ||
s = pd.Series([1, 2, 3]) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
move to reshaping