Skip to content

Commit 125ad73

Browse files
Chang Shewesm
Chang She
authored andcommitted
removed bottleneck calls from replace
1 parent 1f2fb96 commit 125ad73

File tree

9 files changed

+84
-1277
lines changed

9 files changed

+84
-1277
lines changed

pandas/core/common.py

+31
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,37 @@ def notnull(obj):
9797
return not res
9898
return -res
9999

100+
def mask_missing(arr, values_to_mask):
101+
"""
102+
Return a masking array of same size/shape as arr
103+
with entries equaling any member of values_to_mask set to True
104+
"""
105+
if np.isscalar(values_to_mask):
106+
values_to_mask = [values_to_mask]
107+
108+
try:
109+
values_to_mask = np.array(values_to_mask, dtype=arr.dtype)
110+
except Exception:
111+
values_to_mask = np.array(values_to_mask, dtype=object)
112+
113+
na_mask = isnull(values_to_mask)
114+
nonna = values_to_mask[-na_mask]
115+
116+
mask = None
117+
for x in nonna:
118+
if mask is None:
119+
mask = arr == x
120+
else:
121+
mask = mask | (arr == x)
122+
123+
if na_mask.any():
124+
if mask is None:
125+
mask = isnull(arr)
126+
else:
127+
mask = mask | isnull(arr)
128+
129+
return mask
130+
100131
def _pickle_array(arr):
101132
arr = arr.view(np.ndarray)
102133

pandas/core/frame.py

+44-37
Original file line numberDiff line numberDiff line change
@@ -2649,44 +2649,14 @@ def replace(self, to_replace, value=None, method='pad', axis=0,
26492649
self._consolidate_inplace()
26502650

26512651
if value is None:
2652-
if self._is_mixed_type and axis == 1:
2653-
return self.T.replace(to_replace, method=method, limit=limit).T
2654-
2655-
method = com._clean_fill_method(method)
2656-
2657-
if isinstance(to_replace, dict):
2658-
if axis == 1:
2659-
return self.T.replace(to_replace, method=method,
2660-
limit=limit).T
2661-
2662-
rs = self if inplace else self.copy()
2663-
for k, v in to_replace.iteritems():
2664-
if k in rs:
2665-
rs[k].replace(v, method=method, limit=limit,
2666-
inplace=True)
2667-
return rs
2668-
2669-
else:
2670-
new_blocks = []
2671-
for block in self._data.blocks:
2672-
newb = block.interpolate(method, axis=axis,
2673-
limit=limit, inplace=inplace,
2674-
missing=to_replace)
2675-
new_blocks.append(newb)
2676-
new_data = BlockManager(new_blocks, self._data.axes)
2677-
2678-
if inplace:
2679-
self._data = new_data
2680-
return self
2681-
else:
2682-
return self._constructor(new_data)
2683-
2652+
return self._interpolate(to_replace, method, axis, inplace, limit)
26842653
else:
26852654
# Float type values
26862655
if len(self.columns) == 0:
26872656
return self
26882657

26892658
if np.isscalar(to_replace):
2659+
26902660
if np.isscalar(value): # np.nan -> 0
26912661
new_data = self._data.replace(to_replace, value,
26922662
inplace=inplace)
@@ -2699,14 +2669,17 @@ def replace(self, to_replace, value=None, method='pad', axis=0,
26992669
elif isinstance(value, dict): # np.nan -> {'A' : 0, 'B' : -1}
27002670
return self._replace_dest_dict(to_replace, value, inplace)
27012671

2672+
27022673
elif isinstance(to_replace, dict):
2674+
27032675
if np.isscalar(value): # {'A' : np.nan, 'B' : ''} -> 0
27042676
return self._replace_src_dict(to_replace, value, inplace)
2677+
27052678
elif isinstance(value, dict): # {'A' : np.nan} -> {'A' : 0}
27062679
return self._replace_both_dict(to_replace, value, inplace)
2707-
else:
2708-
raise ValueError('Fill value must be scalar or dict')
2709-
return rs
2680+
2681+
raise ValueError('Fill value must be scalar or dict')
2682+
27102683

27112684
elif isinstance(to_replace, (list, np.ndarray)):
27122685
# [np.nan, ''] -> [0, 'missing']
@@ -2723,14 +2696,48 @@ def replace(self, to_replace, value=None, method='pad', axis=0,
27232696
else: # [np.nan, ''] -> 0
27242697
new_data = self._data.replace(to_replace, value,
27252698
inplace=inplace)
2699+
27262700
if inplace:
27272701
self._data = new_data
27282702
return self
27292703
else:
27302704
return self._constructor(new_data)
2705+
2706+
raise ValueError('Invalid to_replace type: %s' % type(to_replace))
2707+
2708+
def _interpolate(self, to_replace, method, axis, inplace, limit):
2709+
if self._is_mixed_type and axis == 1:
2710+
return self.T.replace(to_replace, method=method, limit=limit).T
2711+
2712+
method = com._clean_fill_method(method)
2713+
2714+
if isinstance(to_replace, dict):
2715+
if axis == 1:
2716+
return self.T.replace(to_replace, method=method,
2717+
limit=limit).T
2718+
2719+
rs = self if inplace else self.copy()
2720+
for k, v in to_replace.iteritems():
2721+
if k in rs:
2722+
rs[k].replace(v, method=method, limit=limit,
2723+
inplace=True)
2724+
return rs
2725+
2726+
else:
2727+
new_blocks = []
2728+
for block in self._data.blocks:
2729+
newb = block.interpolate(method, axis=axis,
2730+
limit=limit, inplace=inplace,
2731+
missing=to_replace)
2732+
new_blocks.append(newb)
2733+
new_data = BlockManager(new_blocks, self._data.axes)
2734+
2735+
if inplace:
2736+
self._data = new_data
2737+
return self
27312738
else:
2732-
raise ValueError('Invalid to_replace type: %s' %
2733-
type(to_replace))
2739+
return self._constructor(new_data)
2740+
27342741

27352742
def _replace_dest_dict(self, to_replace, value, inplace):
27362743
rs = self if inplace else self.copy()

pandas/core/internals.py

+7-3
Original file line numberDiff line numberDiff line change
@@ -232,17 +232,21 @@ def replace(self, to_replace, value, inplace=False):
232232
if np.isscalar(to_replace):
233233
if self._can_hold_element(to_replace):
234234
to_replace = self._try_cast(to_replace)
235-
lib.replace(new_values, to_replace, value)
235+
np.putmask(new_values, com.mask_missing(new_values, to_replace),
236+
value)
236237
else:
237238
try:
238239
to_replace = np.array(to_replace, dtype=self.dtype)
239-
lib.replace(new_values, to_replace, value)
240+
np.putmask(new_values, com.mask_missing(new_values, to_replace),
241+
value)
240242
except:
241243
to_replace = np.array(to_replace, dtype=object)
242244
for r in to_replace:
243245
if self._can_hold_element(r):
244246
r = self._try_cast(r)
245-
lib.replace(new_values, r, value)
247+
np.putmask(new_values, com.mask_missing(new_values, to_replace),
248+
value)
249+
246250
if inplace:
247251
return self
248252
else:

pandas/core/nanops.py

-1
Original file line numberDiff line numberDiff line change
@@ -412,4 +412,3 @@ def unique1d(values):
412412
uniques = table.unique(com._ensure_object(values))
413413
uniques = lib.list_to_object_array(uniques)
414414
return uniques
415-

pandas/core/series.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -2131,10 +2131,8 @@ def replace(self, to_replace, value=None, method='pad', inplace=False,
21312131
result = self.copy() if not inplace else self
21322132

21332133
def _rep_one(s, to_rep, v): # replace single value
2134-
if isinstance(to_rep, (list, np.ndarray)):
2135-
to_rep = lib.maybe_convert_objects(np.array(to_rep,
2136-
dtype=object))
2137-
lib.replace(s.values, to_rep, v)
2134+
mask = com.mask_missing(s.values, to_rep)
2135+
np.putmask(s.values, mask, v)
21382136
return s
21392137

21402138
def _rep_dict(rs, to_rep): # replace {[src] -> dest}

pandas/src/codegen_replace.py

-187
This file was deleted.

0 commit comments

Comments
 (0)