Skip to content

Commit d537d77

Browse files
authored
REF: replace column-wise, remove BlockManager.apply filter kwarg (#33340)
1 parent c532f62 commit d537d77

File tree

5 files changed

+85
-113
lines changed

5 files changed

+85
-113
lines changed

pandas/core/frame.py

+35
Original file line numberDiff line numberDiff line change
@@ -4025,6 +4025,41 @@ def replace(
40254025
method=method,
40264026
)
40274027

4028+
def _replace_columnwise(
4029+
self, mapping: Dict[Label, Tuple[Any, Any]], inplace: bool, regex
4030+
):
4031+
"""
4032+
Dispatch to Series.replace column-wise.
4033+
4034+
4035+
Parameters
4036+
----------
4037+
mapping : dict
4038+
of the form {col: (target, value)}
4039+
inplace : bool
4040+
regex : bool or same types as `to_replace` in DataFrame.replace
4041+
4042+
Returns
4043+
-------
4044+
DataFrame or None
4045+
"""
4046+
# Operate column-wise
4047+
res = self if inplace else self.copy()
4048+
ax = self.columns
4049+
4050+
for i in range(len(ax)):
4051+
if ax[i] in mapping:
4052+
ser = self.iloc[:, i]
4053+
4054+
target, value = mapping[ax[i]]
4055+
newobj = ser.replace(target, value, regex=regex)
4056+
4057+
res.iloc[:, i] = newobj
4058+
4059+
if inplace:
4060+
return
4061+
return res.__finalize__(self)
4062+
40284063
@Appender(_shared_docs["shift"] % _shared_doc_kwargs)
40294064
def shift(self, periods=1, freq=None, axis=0, fill_value=None) -> "DataFrame":
40304065
return super().shift(

pandas/core/generic.py

+17-22
Original file line numberDiff line numberDiff line change
@@ -6560,18 +6560,16 @@ def replace(
65606560

65616561
# {'A': NA} -> 0
65626562
elif not is_list_like(value):
6563-
keys = [(k, src) for k, src in to_replace.items() if k in self]
6564-
keys_len = len(keys) - 1
6565-
for i, (k, src) in enumerate(keys):
6566-
convert = i == keys_len
6567-
new_data = new_data.replace(
6568-
to_replace=src,
6569-
value=value,
6570-
filter=[k],
6571-
inplace=inplace,
6572-
regex=regex,
6573-
convert=convert,
6563+
# Operate column-wise
6564+
if self.ndim == 1:
6565+
raise ValueError(
6566+
"Series.replace cannot use dict-like to_replace "
6567+
"and non-None value"
65746568
)
6569+
mapping = {
6570+
col: (to_rep, value) for col, to_rep in to_replace.items()
6571+
}
6572+
return self._replace_columnwise(mapping, inplace, regex)
65756573
else:
65766574
raise TypeError("value argument must be scalar, dict, or Series")
65776575

@@ -6612,17 +6610,14 @@ def replace(
66126610

66136611
# dest iterable dict-like
66146612
if is_dict_like(value): # NA -> {'A' : 0, 'B' : -1}
6615-
new_data = self._mgr
6616-
6617-
for k, v in value.items():
6618-
if k in self:
6619-
new_data = new_data.replace(
6620-
to_replace=to_replace,
6621-
value=v,
6622-
filter=[k],
6623-
inplace=inplace,
6624-
regex=regex,
6625-
)
6613+
# Operate column-wise
6614+
if self.ndim == 1:
6615+
raise ValueError(
6616+
"Series.replace cannot use dict-value and "
6617+
"non-None to_replace"
6618+
)
6619+
mapping = {col: (to_replace, val) for col, val in value.items()}
6620+
return self._replace_columnwise(mapping, inplace, regex)
66266621

66276622
elif not is_list_like(value): # NA -> 0
66286623
new_data = self._mgr.replace(

pandas/core/internals/blocks.py

+13-74
Original file line numberDiff line numberDiff line change
@@ -678,7 +678,6 @@ def replace(
678678
to_replace,
679679
value,
680680
inplace: bool = False,
681-
filter=None,
682681
regex: bool = False,
683682
convert: bool = True,
684683
):
@@ -710,12 +709,7 @@ def replace(
710709
# _can_hold_element checks have reduced this back to the
711710
# scalar case and we can avoid a costly object cast
712711
return self.replace(
713-
to_replace[0],
714-
value,
715-
inplace=inplace,
716-
filter=filter,
717-
regex=regex,
718-
convert=convert,
712+
to_replace[0], value, inplace=inplace, regex=regex, convert=convert,
719713
)
720714

721715
# GH 22083, TypeError or ValueError occurred within error handling
@@ -729,7 +723,6 @@ def replace(
729723
to_replace=to_replace,
730724
value=value,
731725
inplace=inplace,
732-
filter=filter,
733726
regex=regex,
734727
convert=convert,
735728
)
@@ -742,9 +735,6 @@ def replace(
742735
to_replace = convert_scalar_for_putitemlike(to_replace, values.dtype)
743736

744737
mask = missing.mask_missing(values, to_replace)
745-
if filter is not None:
746-
filtered_out = ~self.mgr_locs.isin(filter)
747-
mask[filtered_out.nonzero()[0]] = False
748738

749739
if not mask.any():
750740
if inplace:
@@ -773,7 +763,6 @@ def replace(
773763
to_replace=original_to_replace,
774764
value=value,
775765
inplace=inplace,
776-
filter=filter,
777766
regex=regex,
778767
convert=convert,
779768
)
@@ -2373,20 +2362,13 @@ def _can_hold_element(self, element: Any) -> bool:
23732362
return issubclass(tipo.type, np.bool_)
23742363
return isinstance(element, (bool, np.bool_))
23752364

2376-
def replace(
2377-
self, to_replace, value, inplace=False, filter=None, regex=False, convert=True
2378-
):
2365+
def replace(self, to_replace, value, inplace=False, regex=False, convert=True):
23792366
inplace = validate_bool_kwarg(inplace, "inplace")
23802367
to_replace_values = np.atleast_1d(to_replace)
23812368
if not np.can_cast(to_replace_values, bool):
23822369
return self
23832370
return super().replace(
2384-
to_replace,
2385-
value,
2386-
inplace=inplace,
2387-
filter=filter,
2388-
regex=regex,
2389-
convert=convert,
2371+
to_replace, value, inplace=inplace, regex=regex, convert=convert,
23902372
)
23912373

23922374

@@ -2460,9 +2442,7 @@ def _maybe_downcast(self, blocks: List["Block"], downcast=None) -> List["Block"]
24602442
def _can_hold_element(self, element: Any) -> bool:
24612443
return True
24622444

2463-
def replace(
2464-
self, to_replace, value, inplace=False, filter=None, regex=False, convert=True
2465-
):
2445+
def replace(self, to_replace, value, inplace=False, regex=False, convert=True):
24662446
to_rep_is_list = is_list_like(to_replace)
24672447
value_is_list = is_list_like(value)
24682448
both_lists = to_rep_is_list and value_is_list
@@ -2473,33 +2453,18 @@ def replace(
24732453

24742454
if not either_list and is_re(to_replace):
24752455
return self._replace_single(
2476-
to_replace,
2477-
value,
2478-
inplace=inplace,
2479-
filter=filter,
2480-
regex=True,
2481-
convert=convert,
2456+
to_replace, value, inplace=inplace, regex=True, convert=convert,
24822457
)
24832458
elif not (either_list or regex):
24842459
return super().replace(
2485-
to_replace,
2486-
value,
2487-
inplace=inplace,
2488-
filter=filter,
2489-
regex=regex,
2490-
convert=convert,
2460+
to_replace, value, inplace=inplace, regex=regex, convert=convert,
24912461
)
24922462
elif both_lists:
24932463
for to_rep, v in zip(to_replace, value):
24942464
result_blocks = []
24952465
for b in blocks:
24962466
result = b._replace_single(
2497-
to_rep,
2498-
v,
2499-
inplace=inplace,
2500-
filter=filter,
2501-
regex=regex,
2502-
convert=convert,
2467+
to_rep, v, inplace=inplace, regex=regex, convert=convert,
25032468
)
25042469
result_blocks = _extend_blocks(result, result_blocks)
25052470
blocks = result_blocks
@@ -2510,35 +2475,18 @@ def replace(
25102475
result_blocks = []
25112476
for b in blocks:
25122477
result = b._replace_single(
2513-
to_rep,
2514-
value,
2515-
inplace=inplace,
2516-
filter=filter,
2517-
regex=regex,
2518-
convert=convert,
2478+
to_rep, value, inplace=inplace, regex=regex, convert=convert,
25192479
)
25202480
result_blocks = _extend_blocks(result, result_blocks)
25212481
blocks = result_blocks
25222482
return result_blocks
25232483

25242484
return self._replace_single(
2525-
to_replace,
2526-
value,
2527-
inplace=inplace,
2528-
filter=filter,
2529-
convert=convert,
2530-
regex=regex,
2485+
to_replace, value, inplace=inplace, convert=convert, regex=regex,
25312486
)
25322487

25332488
def _replace_single(
2534-
self,
2535-
to_replace,
2536-
value,
2537-
inplace=False,
2538-
filter=None,
2539-
regex=False,
2540-
convert=True,
2541-
mask=None,
2489+
self, to_replace, value, inplace=False, regex=False, convert=True, mask=None,
25422490
):
25432491
"""
25442492
Replace elements by the given value.
@@ -2551,7 +2499,6 @@ def _replace_single(
25512499
Replacement object.
25522500
inplace : bool, default False
25532501
Perform inplace modification.
2554-
filter : list, optional
25552502
regex : bool, default False
25562503
If true, perform regular expression substitution.
25572504
convert : bool, default True
@@ -2597,9 +2544,7 @@ def _replace_single(
25972544
else:
25982545
# if the thing to replace is not a string or compiled regex call
25992546
# the superclass method -> to_replace is some kind of object
2600-
return super().replace(
2601-
to_replace, value, inplace=inplace, filter=filter, regex=regex
2602-
)
2547+
return super().replace(to_replace, value, inplace=inplace, regex=regex)
26032548

26042549
new_values = self.values if inplace else self.values.copy()
26052550

@@ -2624,15 +2569,10 @@ def re_replacer(s):
26242569

26252570
f = np.vectorize(re_replacer, otypes=[self.dtype])
26262571

2627-
if filter is None:
2628-
filt = slice(None)
2629-
else:
2630-
filt = self.mgr_locs.isin(filter).nonzero()[0]
2631-
26322572
if mask is None:
2633-
new_values[filt] = f(new_values[filt])
2573+
new_values[:] = f(new_values)
26342574
else:
2635-
new_values[filt][mask] = f(new_values[filt][mask])
2575+
new_values[mask] = f(new_values[mask])
26362576

26372577
# convert
26382578
block = self.make_block(new_values)
@@ -2729,7 +2669,6 @@ def replace(
27292669
to_replace,
27302670
value,
27312671
inplace: bool = False,
2732-
filter=None,
27332672
regex: bool = False,
27342673
convert: bool = True,
27352674
):

pandas/core/internals/managers.py

+4-17
Original file line numberDiff line numberDiff line change
@@ -369,34 +369,25 @@ def reduce(self, func, *args, **kwargs):
369369

370370
return res
371371

372-
def apply(self: T, f, filter=None, align_keys=None, **kwargs) -> T:
372+
def apply(self: T, f, align_keys=None, **kwargs) -> T:
373373
"""
374374
Iterate over the blocks, collect and create a new BlockManager.
375375
376376
Parameters
377377
----------
378378
f : str or callable
379379
Name of the Block method to apply.
380-
filter : list, if supplied, only call the block if the filter is in
381-
the block
382380
383381
Returns
384382
-------
385383
BlockManager
386384
"""
385+
assert "filter" not in kwargs
386+
387387
align_keys = align_keys or []
388-
result_blocks = []
388+
result_blocks: List[Block] = []
389389
# fillna: Series/DataFrame is responsible for making sure value is aligned
390390

391-
# filter kwarg is used in replace-* family of methods
392-
if filter is not None:
393-
filter_locs = set(self.items.get_indexer_for(filter))
394-
if len(filter_locs) == len(self.items):
395-
# All items are included, as if there were no filtering
396-
filter = None
397-
else:
398-
kwargs["filter"] = filter_locs
399-
400391
self._consolidate_inplace()
401392

402393
align_copy = False
@@ -410,10 +401,6 @@ def apply(self: T, f, filter=None, align_keys=None, **kwargs) -> T:
410401
}
411402

412403
for b in self.blocks:
413-
if filter is not None:
414-
if not b.mgr_locs.isin(filter_locs).any():
415-
result_blocks.append(b)
416-
continue
417404

418405
if aligned_args:
419406
b_items = self.items[b.mgr_locs.indexer]

pandas/tests/series/methods/test_replace.py

+16
Original file line numberDiff line numberDiff line change
@@ -373,3 +373,19 @@ def test_replace_invalid_to_replace(self):
373373
)
374374
with pytest.raises(TypeError, match=msg):
375375
series.replace(lambda x: x.strip())
376+
377+
def test_replace_only_one_dictlike_arg(self):
378+
# GH#33340
379+
380+
ser = pd.Series([1, 2, "A", pd.Timestamp.now(), True])
381+
to_replace = {0: 1, 2: "A"}
382+
value = "foo"
383+
msg = "Series.replace cannot use dict-like to_replace and non-None value"
384+
with pytest.raises(ValueError, match=msg):
385+
ser.replace(to_replace, value)
386+
387+
to_replace = 1
388+
value = {0: "foo", 2: "bar"}
389+
msg = "Series.replace cannot use dict-value and non-None to_replace"
390+
with pytest.raises(ValueError, match=msg):
391+
ser.replace(to_replace, value)

0 commit comments

Comments
 (0)