Skip to content

Commit 268d8f5

Browse files
committed
PERF: Improve replace perf
1 parent 6dcc238 commit 268d8f5

File tree

4 files changed

+55
-13
lines changed

4 files changed

+55
-13
lines changed

asv_bench/benchmarks/replace.py

+24
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,30 @@ def time_replace_large_dict(self):
3232
self.s.replace(self.to_rep, inplace=True)
3333

3434

35+
class replace_convert(object):
36+
goal_time = 0.5
37+
38+
def setup(self):
39+
self.n = (10 ** 3)
40+
self.to_ts = dict(((i, pd.Timestamp(i)) for i in range(self.n)))
41+
self.to_td = dict(((i, pd.Timedelta(i)) for i in range(self.n)))
42+
self.s = Series(np.random.randint(self.n, size=(10 ** 3)))
43+
self.df = DataFrame({'A': np.random.randint(self.n, size=(10 ** 3)),
44+
'B': np.random.randint(self.n, size=(10 ** 3))})
45+
46+
def time_replace_series_timestamp(self):
47+
self.s.replace(self.to_ts)
48+
49+
def time_replace_series_timedelta(self):
50+
self.s.replace(self.to_td)
51+
52+
def time_replace_frame_timestamp(self):
53+
self.df.replace(self.to_ts)
54+
55+
def time_replace_frame_timedelta(self):
56+
self.df.replace(self.to_td)
57+
58+
3559
class replace_replacena(object):
3660
goal_time = 0.2
3761

doc/source/whatsnew/v0.19.0.txt

+4
Original file line numberDiff line numberDiff line change
@@ -1401,6 +1401,7 @@ Performance Improvements
14011401
- Improved performance of ``DataFrame.quantile()`` as it now operates per-block (:issue:`11623`)
14021402
- Improved performance of float64 hash table operations, fixing some very slow indexing and groupby operations in python 3 (:issue:`13166`, :issue:`13334`)
14031403
- Improved performance of ``DataFrameGroupBy.transform`` (:issue:`12737`)
1404+
<<<<<<< 6dcc23862b6b60ce2a67436b4a278fbe4c05490f
14041405
- Improved performance of ``Index`` and ``Series`` ``.duplicated`` (:issue:`10235`)
14051406
- Improved performance of ``Index.difference`` (:issue:`12044`)
14061407
- Improved performance of ``RangeIndex.is_monotonic_increasing`` and ``is_monotonic_decreasing`` (:issue:`13749`)
@@ -1410,6 +1411,9 @@ Performance Improvements
14101411
- Improved performance of by lazily creating indexing hashtables on larger Indexes (:issue:`14266`)
14111412
- Improved performance of ``groupby.groups`` (:issue:`14293`)
14121413
- Unecessary materializing of a MultiIndex when introspecting for memory usage (:issue:`14308`)
1414+
=======
1415+
- Improved performance of ``.replace()`` (:issue:`12745`)
1416+
>>>>>>> PERF: Improve replace perf
14131417

14141418
.. _whatsnew_0190.bug_fixes:
14151419

pandas/core/generic.py

+15-8
Original file line numberDiff line numberDiff line change
@@ -3477,20 +3477,27 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None,
34773477
res = self if inplace else self.copy()
34783478
for c, src in compat.iteritems(to_replace):
34793479
if c in value and c in self:
3480+
# object conversion is handled in
3481+
# series.replace which is called recursivelly
34803482
res[c] = res[c].replace(to_replace=src,
34813483
value=value[c],
3482-
inplace=False, regex=regex)
3484+
inplace=False,
3485+
regex=regex)
34833486
return None if inplace else res
34843487

34853488
# {'A': NA} -> 0
34863489
elif not is_list_like(value):
3487-
for k, src in compat.iteritems(to_replace):
3488-
if k in self:
3489-
new_data = new_data.replace(to_replace=src,
3490-
value=value,
3491-
filter=[k],
3492-
inplace=inplace,
3493-
regex=regex)
3490+
keys = [(k, src) for k, src in compat.iteritems(to_replace)
3491+
if k in self]
3492+
keys_len = len(keys) - 1
3493+
for i, (k, src) in enumerate(keys):
3494+
convert = i == keys_len
3495+
new_data = new_data.replace(to_replace=src,
3496+
value=value,
3497+
filter=[k],
3498+
inplace=inplace,
3499+
regex=regex,
3500+
convert=convert)
34943501
else:
34953502
raise TypeError('value argument must be scalar, dict, or '
34963503
'Series')

pandas/core/internals.py

+12-5
Original file line numberDiff line numberDiff line change
@@ -623,7 +623,6 @@ def replace(self, to_replace, value, inplace=False, filter=None,
623623

624624
original_to_replace = to_replace
625625
mask = isnull(self.values)
626-
627626
# try to replace, if we raise an error, convert to ObjectBlock and
628627
# retry
629628
try:
@@ -1773,13 +1772,14 @@ def should_store(self, value):
17731772
return issubclass(value.dtype.type, np.bool_)
17741773

17751774
def replace(self, to_replace, value, inplace=False, filter=None,
1776-
regex=False, mgr=None):
1775+
regex=False, convert=True, mgr=None):
17771776
to_replace_values = np.atleast_1d(to_replace)
17781777
if not np.can_cast(to_replace_values, bool):
17791778
return self
17801779
return super(BoolBlock, self).replace(to_replace, value,
17811780
inplace=inplace, filter=filter,
1782-
regex=regex, mgr=mgr)
1781+
regex=regex, convert=convert,
1782+
mgr=mgr)
17831783

17841784

17851785
class ObjectBlock(Block):
@@ -3192,6 +3192,7 @@ def comp(s):
31923192
masks = [comp(s) for i, s in enumerate(src_list)]
31933193

31943194
result_blocks = []
3195+
src_len = len(src_list) - 1
31953196
for blk in self.blocks:
31963197

31973198
# its possible to get multiple result blocks here
@@ -3201,8 +3202,9 @@ def comp(s):
32013202
new_rb = []
32023203
for b in rb:
32033204
if b.dtype == np.object_:
3205+
convert = i == src_len
32043206
result = b.replace(s, d, inplace=inplace, regex=regex,
3205-
mgr=mgr)
3207+
mgr=mgr, convert=convert)
32063208
new_rb = _extend_blocks(result, new_rb)
32073209
else:
32083210
# get our mask for this element, sized to this
@@ -4766,7 +4768,12 @@ def _putmask_smart(v, m, n):
47664768

47674769
# change the dtype
47684770
dtype, _ = _maybe_promote(n.dtype)
4769-
nv = v.astype(dtype)
4771+
4772+
if needs_i8_conversion(v.dtype) and is_object_dtype(dtype):
4773+
nv = v.get_values(dtype)
4774+
else:
4775+
nv = v.astype(dtype)
4776+
47704777
try:
47714778
nv[m] = n[m]
47724779
except ValueError:

0 commit comments

Comments
 (0)