Skip to content

Commit 32ad737

Browse files
committed
Merge pull request #3068 from jreback/replace2
BUG: fixes in replace to deal with block upcasting
2 parents fbfd16a + 1cae798 commit 32ad737

File tree

5 files changed

+91
-58
lines changed

5 files changed

+91
-58
lines changed

pandas/core/common.py

+28-9
Original file line numberDiff line numberDiff line change
@@ -745,17 +745,36 @@ def _maybe_promote(dtype, fill_value=np.nan):
745745
return dtype, fill_value
746746

747747

748-
def _maybe_upcast_putmask(result, mask, other, dtype=None):
748+
def _maybe_upcast_putmask(result, mask, other, dtype=None, change=None):
749749
""" a safe version of put mask that (potentially upcasts the result
750-
return the result and a changed flag """
751-
try:
752-
np.putmask(result, mask, other)
753-
except:
754-
# our type is wrong here, need to upcast
755-
if (-mask).any():
756-
result, fill_value = _maybe_upcast(result, fill_value=other, dtype=dtype, copy=True)
750+
return the result
751+
if change is not None, then MUTATE the change (and change the dtype)
752+
return a changed flag
753+
"""
754+
755+
if mask.any():
756+
757+
def changeit():
758+
# our type is wrong here, need to upcast
759+
if (-mask).any():
760+
r, fill_value = _maybe_upcast(result, fill_value=other, dtype=dtype, copy=True)
761+
np.putmask(r, mask, other)
762+
763+
# we need to actually change the dtype here
764+
if change is not None:
765+
change.dtype = r.dtype
766+
change[:] = r
767+
768+
return r, True
769+
770+
new_dtype, fill_value = _maybe_promote(result.dtype,other)
771+
if new_dtype != result.dtype:
772+
return changeit()
773+
774+
try:
757775
np.putmask(result, mask, other)
758-
return result, True
776+
except:
777+
return changeit()
759778

760779
return result, False
761780

pandas/core/frame.py

+28-38
Original file line numberDiff line numberDiff line change
@@ -3467,14 +3467,21 @@ def replace(self, to_replace, value=None, method='pad', axis=0,
34673467
if len(self.columns) == 0:
34683468
return self
34693469

3470+
new_data = self._data
34703471
if isinstance(to_replace, dict):
34713472
if isinstance(value, dict): # {'A' : NA} -> {'A' : 0}
3472-
return self._replace_both_dict(to_replace, value, inplace)
3473+
new_data = self._data
3474+
for c, src in to_replace.iteritems():
3475+
if c in value and c in self:
3476+
new_data = new_data.replace(src, value[c], filter = [ c ], inplace=inplace)
34733477

34743478
elif not isinstance(value, (list, np.ndarray)):
3475-
return self._replace_src_dict(to_replace, value, inplace)
3476-
3477-
raise ValueError('Fill value must be scalar or dict')
3479+
new_data = self._data
3480+
for k, src in to_replace.iteritems():
3481+
if k in self:
3482+
new_data = new_data.replace(src, value, filter = [ k ], inplace=inplace)
3483+
else:
3484+
raise ValueError('Fill value must be scalar or dict')
34783485

34793486
elif isinstance(to_replace, (list, np.ndarray)):
34803487
# [NA, ''] -> [0, 'missing']
@@ -3491,25 +3498,29 @@ def replace(self, to_replace, value=None, method='pad', axis=0,
34913498
new_data = self._data.replace(to_replace, value,
34923499
inplace=inplace)
34933500

3494-
if inplace:
3495-
self._data = new_data
3496-
return self
3497-
else:
3498-
return self._constructor(new_data)
34993501
else:
3502+
3503+
# dest iterable dict-like
35003504
if isinstance(value, dict): # NA -> {'A' : 0, 'B' : -1}
3501-
return self._replace_dest_dict(to_replace, value, inplace)
3505+
3506+
new_data = self._data
3507+
for k, v in value.iteritems():
3508+
if k in self:
3509+
new_data = new_data.replace(to_replace, v, filter = [ k ], inplace=inplace)
3510+
35023511
elif not isinstance(value, (list, np.ndarray)): # NA -> 0
35033512
new_data = self._data.replace(to_replace, value,
35043513
inplace=inplace)
3505-
if inplace:
3506-
self._data = new_data
3507-
return self
3508-
else:
3509-
return self._constructor(new_data)
3514+
else:
3515+
raise ValueError('Invalid to_replace type: %s' %
3516+
type(to_replace)) # pragma: no cover
3517+
35103518

3511-
raise ValueError('Invalid to_replace type: %s' %
3512-
type(to_replace)) # pragma: no cover
3519+
if inplace:
3520+
self._data = new_data
3521+
return self
3522+
else:
3523+
return self._constructor(new_data)
35133524

35143525
def _interpolate(self, to_replace, method, axis, inplace, limit):
35153526
if self._is_mixed_type and axis == 1:
@@ -3543,27 +3554,6 @@ def _interpolate(self, to_replace, method, axis, inplace, limit):
35433554
else:
35443555
return self._constructor(new_data)
35453556

3546-
def _replace_dest_dict(self, to_replace, value, inplace):
3547-
rs = self if inplace else self.copy()
3548-
for k, v in value.iteritems():
3549-
if k in rs:
3550-
rs[k].replace(to_replace, v, inplace=True)
3551-
return rs if not inplace else None
3552-
3553-
def _replace_src_dict(self, to_replace, value, inplace):
3554-
rs = self if inplace else self.copy()
3555-
for k, src in to_replace.iteritems():
3556-
if k in rs:
3557-
rs[k].replace(src, value, inplace=True)
3558-
return rs if not inplace else None
3559-
3560-
def _replace_both_dict(self, to_replace, value, inplace):
3561-
rs = self if inplace else self.copy()
3562-
for c, src in to_replace.iteritems():
3563-
if c in value and c in rs:
3564-
rs[c].replace(src, value[c], inplace=True)
3565-
return rs if not inplace else None
3566-
35673557
#----------------------------------------------------------------------
35683558
# Rename
35693559

pandas/core/internals.py

+14-1
Original file line numberDiff line numberDiff line change
@@ -880,10 +880,23 @@ def _verify_integrity(self):
880880
'block items')
881881

882882
def apply(self, f, *args, **kwargs):
883-
""" iterate over the blocks, collect and create a new block manager """
883+
""" iterate over the blocks, collect and create a new block manager
884+
885+
Parameters
886+
----------
887+
f : the callable or function name to operate on at the block level
888+
axes : optional (if not supplied, use self.axes)
889+
filter : callable, if supplied, only call the block if the filter is True
890+
"""
891+
884892
axes = kwargs.pop('axes',None)
893+
filter = kwargs.pop('filter',None)
885894
result_blocks = []
886895
for blk in self.blocks:
896+
if filter is not None:
897+
if not blk.items.isin(filter).any():
898+
result_blocks.append(blk)
899+
continue
887900
if callable(f):
888901
applied = f(blk, *args, **kwargs)
889902
else:

pandas/core/series.py

+11-10
Original file line numberDiff line numberDiff line change
@@ -732,13 +732,8 @@ def where(self, cond, other=nan, inplace=False):
732732
if len(other) != len(ser):
733733
raise ValueError('Length of replacements must equal series length')
734734

735-
result, changed = com._maybe_upcast_putmask(ser,~cond,other)
736-
if changed:
737-
738-
# need to actually change ser here
739-
if inplace:
740-
ser.dtype = result.dtype
741-
ser[:] = result
735+
change = ser if inplace else None
736+
result, changed = com._maybe_upcast_putmask(ser,~cond,other,change=change)
742737

743738
return None if inplace else ser
744739

@@ -2680,11 +2675,17 @@ def replace(self, to_replace, value=None, method='pad', inplace=False,
26802675
-------
26812676
replaced : Series
26822677
"""
2683-
result = self.copy() if not inplace else self
2678+
2679+
if inplace:
2680+
result = self
2681+
change = self
2682+
else:
2683+
result = self.copy()
2684+
change = None
26842685

26852686
def _rep_one(s, to_rep, v): # replace single value
26862687
mask = com.mask_missing(s.values, to_rep)
2687-
np.putmask(s.values, mask, v)
2688+
com._maybe_upcast_putmask(s.values,mask,v,change=change)
26882689

26892690
def _rep_dict(rs, to_rep): # replace {[src] -> dest}
26902691

@@ -2701,7 +2702,7 @@ def _rep_dict(rs, to_rep): # replace {[src] -> dest}
27012702
masks[d] = com.mask_missing(rs.values, sset)
27022703

27032704
for d, m in masks.iteritems():
2704-
np.putmask(rs.values, m, d)
2705+
com._maybe_upcast_putmask(rs.values,m,d,change=change)
27052706
else: # if no risk of clobbering then simple
27062707
for d, sset in dd.iteritems():
27072708
_rep_one(rs, sset, d)

pandas/tests/test_frame.py

+10
Original file line numberDiff line numberDiff line change
@@ -5621,6 +5621,16 @@ def test_replace_mixed(self):
56215621
result = df.replace([1,2], ['foo','bar'])
56225622
assert_frame_equal(result,expected)
56235623

5624+
# test case from
5625+
from pandas.util.testing import makeCustomDataframe as mkdf
5626+
df = DataFrame({'A' : Series([3,0],dtype='int64'), 'B' : Series([0,3],dtype='int64') })
5627+
result = df.replace(3, df.mean().to_dict())
5628+
expected = df.copy().astype('float64')
5629+
m = df.mean()
5630+
expected.iloc[0,0] = m[0]
5631+
expected.iloc[1,1] = m[1]
5632+
assert_frame_equal(result,expected)
5633+
56245634
def test_replace_interpolate(self):
56255635
padded = self.tsframe.replace(nan, method='pad')
56265636
assert_frame_equal(padded, self.tsframe.fillna(method='pad'))

0 commit comments

Comments
 (0)