Skip to content

BUG: fixes in replace to deal with block upcasting #3068

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 16, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 28 additions & 9 deletions pandas/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -745,17 +745,36 @@ def _maybe_promote(dtype, fill_value=np.nan):
return dtype, fill_value


def _maybe_upcast_putmask(result, mask, other, dtype=None):
def _maybe_upcast_putmask(result, mask, other, dtype=None, change=None):
""" a safe version of put mask that (potentially upcasts the result
return the result and a changed flag """
try:
np.putmask(result, mask, other)
except:
# our type is wrong here, need to upcast
if (-mask).any():
result, fill_value = _maybe_upcast(result, fill_value=other, dtype=dtype, copy=True)
return the result
if change is not None, then MUTATE the change (and change the dtype)
return a changed flag
"""

if mask.any():

def changeit():
# our type is wrong here, need to upcast
if (-mask).any():
r, fill_value = _maybe_upcast(result, fill_value=other, dtype=dtype, copy=True)
np.putmask(r, mask, other)

# we need to actually change the dtype here
if change is not None:
change.dtype = r.dtype
change[:] = r

return r, True

new_dtype, fill_value = _maybe_promote(result.dtype,other)
if new_dtype != result.dtype:
return changeit()

try:
np.putmask(result, mask, other)
return result, True
except:
return changeit()

return result, False

Expand Down
66 changes: 28 additions & 38 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -3467,14 +3467,21 @@ def replace(self, to_replace, value=None, method='pad', axis=0,
if len(self.columns) == 0:
return self

new_data = self._data
if isinstance(to_replace, dict):
if isinstance(value, dict): # {'A' : NA} -> {'A' : 0}
return self._replace_both_dict(to_replace, value, inplace)
new_data = self._data
for c, src in to_replace.iteritems():
if c in value and c in self:
new_data = new_data.replace(src, value[c], filter = [ c ], inplace=inplace)

elif not isinstance(value, (list, np.ndarray)):
return self._replace_src_dict(to_replace, value, inplace)

raise ValueError('Fill value must be scalar or dict')
new_data = self._data
for k, src in to_replace.iteritems():
if k in self:
new_data = new_data.replace(src, value, filter = [ k ], inplace=inplace)
else:
raise ValueError('Fill value must be scalar or dict')

elif isinstance(to_replace, (list, np.ndarray)):
# [NA, ''] -> [0, 'missing']
Expand All @@ -3491,25 +3498,29 @@ def replace(self, to_replace, value=None, method='pad', axis=0,
new_data = self._data.replace(to_replace, value,
inplace=inplace)

if inplace:
self._data = new_data
return self
else:
return self._constructor(new_data)
else:

# dest iterable dict-like
if isinstance(value, dict): # NA -> {'A' : 0, 'B' : -1}
return self._replace_dest_dict(to_replace, value, inplace)

new_data = self._data
for k, v in value.iteritems():
if k in self:
new_data = new_data.replace(to_replace, v, filter = [ k ], inplace=inplace)

elif not isinstance(value, (list, np.ndarray)): # NA -> 0
new_data = self._data.replace(to_replace, value,
inplace=inplace)
if inplace:
self._data = new_data
return self
else:
return self._constructor(new_data)
else:
raise ValueError('Invalid to_replace type: %s' %
type(to_replace)) # pragma: no cover


raise ValueError('Invalid to_replace type: %s' %
type(to_replace)) # pragma: no cover
if inplace:
self._data = new_data
return self
else:
return self._constructor(new_data)

def _interpolate(self, to_replace, method, axis, inplace, limit):
if self._is_mixed_type and axis == 1:
Expand Down Expand Up @@ -3543,27 +3554,6 @@ def _interpolate(self, to_replace, method, axis, inplace, limit):
else:
return self._constructor(new_data)

def _replace_dest_dict(self, to_replace, value, inplace):
rs = self if inplace else self.copy()
for k, v in value.iteritems():
if k in rs:
rs[k].replace(to_replace, v, inplace=True)
return rs if not inplace else None

def _replace_src_dict(self, to_replace, value, inplace):
rs = self if inplace else self.copy()
for k, src in to_replace.iteritems():
if k in rs:
rs[k].replace(src, value, inplace=True)
return rs if not inplace else None

def _replace_both_dict(self, to_replace, value, inplace):
rs = self if inplace else self.copy()
for c, src in to_replace.iteritems():
if c in value and c in rs:
rs[c].replace(src, value[c], inplace=True)
return rs if not inplace else None

#----------------------------------------------------------------------
# Rename

Expand Down
15 changes: 14 additions & 1 deletion pandas/core/internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -880,10 +880,23 @@ def _verify_integrity(self):
'block items')

def apply(self, f, *args, **kwargs):
""" iterate over the blocks, collect and create a new block manager """
""" iterate over the blocks, collect and create a new block manager

Parameters
----------
f : the callable or function name to operate on at the block level
axes : optional (if not supplied, use self.axes)
filter : callable, if supplied, only call the block if the filter is True
"""

axes = kwargs.pop('axes',None)
filter = kwargs.pop('filter',None)
result_blocks = []
for blk in self.blocks:
if filter is not None:
if not blk.items.isin(filter).any():
result_blocks.append(blk)
continue
if callable(f):
applied = f(blk, *args, **kwargs)
else:
Expand Down
21 changes: 11 additions & 10 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -732,13 +732,8 @@ def where(self, cond, other=nan, inplace=False):
if len(other) != len(ser):
raise ValueError('Length of replacements must equal series length')

result, changed = com._maybe_upcast_putmask(ser,~cond,other)
if changed:

# need to actually change ser here
if inplace:
ser.dtype = result.dtype
ser[:] = result
change = ser if inplace else None
result, changed = com._maybe_upcast_putmask(ser,~cond,other,change=change)

return None if inplace else ser

Expand Down Expand Up @@ -2680,11 +2675,17 @@ def replace(self, to_replace, value=None, method='pad', inplace=False,
-------
replaced : Series
"""
result = self.copy() if not inplace else self

if inplace:
result = self
change = self
else:
result = self.copy()
change = None

def _rep_one(s, to_rep, v): # replace single value
mask = com.mask_missing(s.values, to_rep)
np.putmask(s.values, mask, v)
com._maybe_upcast_putmask(s.values,mask,v,change=change)

def _rep_dict(rs, to_rep): # replace {[src] -> dest}

Expand All @@ -2701,7 +2702,7 @@ def _rep_dict(rs, to_rep): # replace {[src] -> dest}
masks[d] = com.mask_missing(rs.values, sset)

for d, m in masks.iteritems():
np.putmask(rs.values, m, d)
com._maybe_upcast_putmask(rs.values,m,d,change=change)
else: # if no risk of clobbering then simple
for d, sset in dd.iteritems():
_rep_one(rs, sset, d)
Expand Down
10 changes: 10 additions & 0 deletions pandas/tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -5621,6 +5621,16 @@ def test_replace_mixed(self):
result = df.replace([1,2], ['foo','bar'])
assert_frame_equal(result,expected)

# test case from
from pandas.util.testing import makeCustomDataframe as mkdf
df = DataFrame({'A' : Series([3,0],dtype='int64'), 'B' : Series([0,3],dtype='int64') })
result = df.replace(3, df.mean().to_dict())
expected = df.copy().astype('float64')
m = df.mean()
expected.iloc[0,0] = m[0]
expected.iloc[1,1] = m[1]
assert_frame_equal(result,expected)

def test_replace_interpolate(self):
padded = self.tsframe.replace(nan, method='pad')
assert_frame_equal(padded, self.tsframe.fillna(method='pad'))
Expand Down