Skip to content

Commit 4a6fd01

Browse files
author
Chang She
committed
BUG: DataFrame.replace later values clobber earlier ones
1 parent 47520f4 commit 4a6fd01

File tree

4 files changed

+48
-10
lines changed

4 files changed

+48
-10
lines changed

pandas/core/frame.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -2896,8 +2896,7 @@ def replace(self, to_replace, value=None, method='pad', axis=0,
28962896
(len(to_replace), len(value)))
28972897

28982898
new_data = self._data if inplace else self.copy()._data
2899-
for s, d in zip(to_replace, value):
2900-
new_data = new_data.replace(s, d, inplace=True)
2899+
new_data._replace_list(to_replace, value)
29012900

29022901
else: # [np.nan, ''] -> 0
29032902
new_data = self._data.replace(to_replace, value,

pandas/core/internals.py

+26
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,16 @@ def replace(self, to_replace, value, inplace=False):
253253
else:
254254
return make_block(new_values, self.items, self.ref_items)
255255

256+
def putmask(self, mask, new, inplace=False):
257+
new_values = self.values if inplace else self.values.copy()
258+
if self._can_hold_element(new):
259+
new = self._try_cast(new)
260+
np.putmask(new_values, mask, new)
261+
if inplace:
262+
return self
263+
else:
264+
return make_block(new_values, self.items, self.ref_items)
265+
256266
def interpolate(self, method='pad', axis=0, inplace=False,
257267
limit=None, missing=None):
258268
values = self.values if inplace else self.values.copy()
@@ -1127,6 +1137,22 @@ def replace(self, to_replace, value, inplace=False):
11271137
return self
11281138
return BlockManager(new_blocks, self.axes)
11291139

1140+
def _replace_list(self, src_lst, dest_lst):
1141+
sset = set(src_lst)
1142+
if any([k in sset for k in dest_lst]):
1143+
masks = {}
1144+
for s in src_lst:
1145+
masks[s] = [b.values == s for b in self.blocks]
1146+
1147+
for s, d in zip(src_lst, dest_lst):
1148+
[b.putmask(masks[s][i], d, inplace=True) for i, b in
1149+
enumerate(self.blocks)]
1150+
else:
1151+
for s, d in zip(src_lst, dest_lst):
1152+
self.replace(s, d, inplace=True)
1153+
1154+
return self
1155+
11301156
@property
11311157
def block_id_vector(self):
11321158
# TODO

pandas/core/series.py

+16-8
Original file line numberDiff line numberDiff line change
@@ -2239,15 +2239,23 @@ def _rep_one(s, to_rep, v): # replace single value
22392239

22402240
def _rep_dict(rs, to_rep): # replace {[src] -> dest}
22412241

2242+
all_src = set()
22422243
dd = {} # group by unique destination value
2243-
[dd.setdefault(d, []).append(s) for s, d in to_rep.iteritems()]
2244-
2245-
masks = {}
2246-
for d, sset in dd.iteritems(): # now replace by each dest
2247-
masks[d] = com.mask_missing(rs.values, sset)
2248-
2249-
for d, m in masks.iteritems():
2250-
np.putmask(rs.values, m, d)
2244+
for s, d in to_rep.iteritems():
2245+
dd.setdefault(d, []).append(s)
2246+
all_src.add(s)
2247+
2248+
if any(d in all_src for d in dd.keys()):
2249+
# don't clobber each other at the cost of temporaries
2250+
masks = {}
2251+
for d, sset in dd.iteritems(): # now replace by each dest
2252+
masks[d] = com.mask_missing(rs.values, sset)
2253+
2254+
for d, m in masks.iteritems():
2255+
np.putmask(rs.values, m, d)
2256+
else: # if no risk of clobbering then simple
2257+
for d, sset in dd.iteritems():
2258+
_rep_one(rs, sset, d)
22512259
return rs
22522260

22532261
if np.isscalar(to_replace):

pandas/tests/test_frame.py

+5
Original file line numberDiff line numberDiff line change
@@ -3974,6 +3974,11 @@ def test_replace_input_formats(self):
39743974
expected[k] = v.replace(to_rep[k], values[k])
39753975
assert_frame_equal(filled, DataFrame(expected))
39763976

3977+
result = df.replace([0, 2, 5], [5, 2, 0])
3978+
expected = DataFrame({'A' : [np.nan, 5, np.inf], 'B' : [5, 2, 0],
3979+
'C' : ['', 'asdf', 'fd']})
3980+
assert_frame_equal(result, expected)
3981+
39773982
# dict to scalar
39783983
filled = df.replace(to_rep, 0)
39793984
expected = {}

0 commit comments

Comments
 (0)