Skip to content

Commit bb7869d

Browse files
Chang Shewesm
Chang She
authored andcommitted
ENH: Series.replace #929
1 parent 580adc9 commit bb7869d

File tree

3 files changed

+148
-16
lines changed

3 files changed

+148
-16
lines changed

pandas/core/common.py

+22-8
Original file line numberDiff line numberDiff line change
@@ -372,7 +372,7 @@ def wrapper(arr, mask, limit=None):
372372
_backfill_1d_datetime = _interp_wrapper(_algos.backfill_inplace_int64, np.int64)
373373
_backfill_2d_datetime = _interp_wrapper(_algos.backfill_2d_inplace_int64, np.int64)
374374

375-
def pad_1d(values, limit=None):
375+
def pad_1d(values, limit=None, mask=None):
376376
if is_float_dtype(values):
377377
_method = _algos.pad_inplace_float64
378378
elif is_datetime64_dtype(values):
@@ -382,9 +382,12 @@ def pad_1d(values, limit=None):
382382
else: # pragma: no cover
383383
raise ValueError('Invalid dtype for padding')
384384

385-
_method(values, isnull(values).view(np.uint8), limit=limit)
385+
if mask is None:
386+
mask = isnull(values)
387+
mask = mask.view(np.uint8)
388+
_method(values, mask, limit=limit)
386389

387-
def backfill_1d(values, limit=None):
390+
def backfill_1d(values, limit=None, mask=None):
388391
if is_float_dtype(values):
389392
_method = _algos.backfill_inplace_float64
390393
elif is_datetime64_dtype(values):
@@ -394,9 +397,13 @@ def backfill_1d(values, limit=None):
394397
else: # pragma: no cover
395398
raise ValueError('Invalid dtype for padding')
396399

397-
_method(values, isnull(values).view(np.uint8), limit=limit)
400+
if mask is None:
401+
mask = isnull(values)
402+
mask = mask.view(np.uint8)
398403

399-
def pad_2d(values, limit=None):
404+
_method(values, mask, limit=limit)
405+
406+
def pad_2d(values, limit=None, mask=None):
400407
if is_float_dtype(values):
401408
_method = _algos.pad_2d_inplace_float64
402409
elif is_datetime64_dtype(values):
@@ -406,9 +413,13 @@ def pad_2d(values, limit=None):
406413
else: # pragma: no cover
407414
raise ValueError('Invalid dtype for padding')
408415

409-
_method(values, isnull(values).view(np.uint8), limit=limit)
416+
if mask is None:
417+
mask = isnull(values)
418+
mask = mask.view(np.uint8)
419+
420+
_method(values, mask, limit=limit)
410421

411-
def backfill_2d(values, limit=None):
422+
def backfill_2d(values, limit=None, mask=None):
412423
if is_float_dtype(values):
413424
_method = _algos.backfill_2d_inplace_float64
414425
elif is_datetime64_dtype(values):
@@ -418,8 +429,11 @@ def backfill_2d(values, limit=None):
418429
else: # pragma: no cover
419430
raise ValueError('Invalid dtype for padding')
420431

421-
_method(values, isnull(values).view(np.uint8), limit=limit)
432+
if mask is None:
433+
mask = isnull(values)
434+
mask = mask.view(np.uint8)
422435

436+
_method(values, mask, limit=limit)
423437

424438
def _consensus_name_attr(objs):
425439
name = objs[0].name

pandas/core/series.py

+103-5
Original file line numberDiff line numberDiff line change
@@ -2078,11 +2078,7 @@ def fillna(self, value=None, method='pad', inplace=False,
20782078
if method is None: # pragma: no cover
20792079
raise ValueError('must specify a fill method')
20802080

2081-
method = com._clean_fill_method(method)
2082-
if method == 'pad':
2083-
fill_f = com.pad_1d
2084-
elif method == 'backfill':
2085-
fill_f = com.backfill_1d
2081+
fill_f = _get_fill_func(method)
20862082

20872083
if inplace:
20882084
values = self.values
@@ -2098,6 +2094,91 @@ def fillna(self, value=None, method='pad', inplace=False,
20982094

20992095
return result
21002096

2097+
2098+
def replace(self, to_replace=None, value=None, method='pad', inplace=False,
2099+
limit=None):
2100+
"""
2101+
Replace arbitrary values in a Series
2102+
2103+
Parameters
2104+
----------
2105+
to_replace : list or dict, default None
2106+
list of values to be replaced or dict of replacement values
2107+
value : anything
2108+
if to_replace is a list then value is the replacement value
2109+
method : {'backfill', 'bfill', 'pad', 'ffill', None}, default 'pad'
2110+
Method to use for filling holes in reindexed Series
2111+
pad / ffill: propagate last valid observation forward to next valid
2112+
backfill / bfill: use NEXT valid observation to fill gap
2113+
inplace : boolean, default False
2114+
If True, fill the Series in place. Note: this will modify any other
2115+
views on this Series, for example a column in a DataFrame. Returns
2116+
a reference to the filled object, which is self if inplace=True
2117+
limit : int, default None
2118+
Maximum size gap to forward or backward fill
2119+
2120+
Notes
2121+
-----
2122+
replace does not distinguish between NaN and None
2123+
2124+
See also
2125+
--------
2126+
fillna, reindex, asfreq
2127+
2128+
Returns
2129+
-------
2130+
replaced : Series
2131+
"""
2132+
result = self.copy() if not inplace else self
2133+
single_val = False
2134+
2135+
def _rep_one(s, to_rep, v): # replace single value
2136+
m = _mask_missing(s, to_rep)
2137+
np.putmask(s, m, v)
2138+
return s
2139+
2140+
def _rep_dict(rs, to_rep): # replace {[src] -> dest}
2141+
2142+
dd = {} # group by unique destination value
2143+
[dd.setdefault(d, []).append(s) for s, d in to_rep.iteritems()]
2144+
2145+
for d, sset in dd.iteritems(): # now replace by each dest
2146+
rs = _rep_one(rs, sset, d)
2147+
return rs
2148+
2149+
if isinstance(to_replace, dict):
2150+
return _rep_dict(result, to_replace)
2151+
2152+
if isinstance(to_replace, (list, np.ndarray)):
2153+
2154+
if isinstance(value, (list, np.ndarray)): # check same length
2155+
2156+
vl, rl = len(value), len(to_replace)
2157+
if vl == rl:
2158+
return _rep_dict(result, dict(zip(to_replace, value)))
2159+
raise ValueError('Got %d to replace but %d values' % (rl, vl))
2160+
2161+
elif value is not None: # otherwise all replaced with same value
2162+
2163+
return _rep_one(result, to_replace, value)
2164+
2165+
else: # method
2166+
if method is None: # pragma: no cover
2167+
raise ValueError('must specify a fill method')
2168+
fill_f = _get_fill_func(method)
2169+
2170+
mask = _mask_missing(result, to_replace)
2171+
fill_f(result.values, limit=limit, mask=mask)
2172+
2173+
if not inplace:
2174+
result = Series(result.values, index=self.index,
2175+
name=self.name)
2176+
return result
2177+
2178+
2179+
raise ValueError('Unrecognized to_replace type %s' %
2180+
type(to_replace))
2181+
21012182
def isin(self, values):
21022183
"""
21032184
Return boolean vector showing whether each element in the Series is
@@ -2549,6 +2630,23 @@ def _resolve_offset(freq, kwds):
25492630

25502631
return offset
25512632

2633+
def _get_fill_func(method):
2634+
method = com._clean_fill_method(method)
2635+
if method == 'pad':
2636+
fill_f = com.pad_1d
2637+
elif method == 'backfill':
2638+
fill_f = com.backfill_1d
2639+
return fill_f
2640+
2641+
def _mask_missing(series, missing_values):
2642+
missing_values = np.array(list(missing_values), dtype=object)
2643+
if isnull(missing_values).any():
2644+
missing_values = missing_values[notnull(missing_values)]
2645+
mask = isnull(series) | series.isin(missing_values)
2646+
else:
2647+
mask = series.isin(missing_values)
2648+
return mask
2649+
25522650

25532651
#----------------------------------------------------------------------
25542652
# Add plotting methods to Series

pandas/tests/test_series.py

+23-3
Original file line numberDiff line numberDiff line change
@@ -2649,23 +2649,43 @@ def test_timeseries_coercion(self):
26492649

26502650
def test_replace(self):
26512651
N = 100
2652-
ser = Series(np.fabs(np.random.randn(len(N))), tm.makeDataIndex(N))
2652+
ser = Series(np.fabs(np.random.randn(N)), tm.makeDateIndex(N),
2653+
dtype=object)
26532654
ser[:5] = np.nan
26542655
ser[6:10] = 'foo'
26552656
ser[20:30] = 'bar'
26562657

2658+
# replace list with a single value
26572659
rs = ser.replace([np.nan, 'foo', 'bar'], -1)
2660+
26582661
self.assert_((rs[:5] == -1).all())
26592662
self.assert_((rs[6:10] == -1).all())
26602663
self.assert_((rs[20:30] == -1).all())
2661-
self.assert_((ser >= 0).all())
2664+
self.assert_((isnull(ser[:5])).all())
26622665

2666+
# replace with different values
26632667
rs = ser.replace({np.nan : -1, 'foo' : -2, 'bar' : -3})
2668+
26642669
self.assert_((rs[:5] == -1).all())
26652670
self.assert_((rs[6:10] == -2).all())
26662671
self.assert_((rs[20:30] == -3).all())
2667-
self.assert_((ser >= 0).all())
2672+
self.assert_((isnull(ser[:5])).all())
2673+
2674+
# replace with different values with 2 lists
2675+
rs2 = ser.replace([np.nan, 'foo', 'bar'], [-1, -2, -3])
2676+
assert_series_equal(rs, rs2)
2677+
2678+
# replace with forward fill not considering np.nan missing
2679+
s2 = ser.copy()
2680+
s2[5] = np.nan
2681+
rs3 = s2.replace(['foo', 'bar'])
2682+
self.assert_(isnull(rs3[6]))
2683+
2684+
# replace with back fill considering np.nan as missing
2685+
rs4 = ser.replace([np.nan, 'foo', 'bar'], method='bfill')
2686+
assert_almost_equal(rs4[4], ser[5])
26682687

2688+
# replace inplace
26692689
ser.replace([np.nan, 'foo', 'bar'], -1, inplace=True)
26702690
self.assert_((ser[:5] == -1).all())
26712691
self.assert_((ser[6:10] == -1).all())

0 commit comments

Comments
 (0)