diff --git a/doc/source/release.rst b/doc/source/release.rst index 9a34cdbdfb5a8..08a9b15dc9f58 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -152,6 +152,8 @@ pandas 0.13 passed a string (:issue:`4763`). Pass a ``list`` of one element (containing the string) instead. - Remove undocumented/unused ``kind`` keyword argument from ``read_excel``, and ``ExcelFile``. (:issue:`4713`, :issue:`4712`) + - The ``method`` argument of ``NDFrame.replace()`` is valid again, so that a + a list can be passed to ``to_replace`` (:issue:`4743`). **Internal Refactoring** diff --git a/pandas/core/common.py b/pandas/core/common.py index 6fc006eae74fe..8b9ba4d5eea16 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -1047,6 +1047,7 @@ def wrapper(arr, mask, limit=None): np.int64) + def pad_1d(values, limit=None, mask=None): dtype = values.dtype.name @@ -1188,6 +1189,14 @@ def _consensus_name_attr(objs): return None return name + +_fill_methods = {'pad': pad_1d, 'backfill': backfill_1d} + +def _get_fill_func(method): + method = _clean_fill_method(method) + return _fill_methods[method] + + #---------------------------------------------------------------------- # Lots of little utilities diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 7f5b9b7f75545..3bdfd98127bb7 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5,6 +5,7 @@ import numpy as np import pandas.lib as lib +import pandas as pd from pandas.core.base import PandasObject from pandas.core.index import Index, MultiIndex, _ensure_index import pandas.core.indexing as indexing @@ -19,6 +20,33 @@ _infer_dtype_from_scalar, _maybe_promote, ABCSeries) + + +def is_dictlike(x): + return isinstance(x, (dict, com.ABCSeries)) + + +def _single_replace(self, to_replace, method, inplace, limit): + orig_dtype = self.dtype + result = self if inplace else self.copy() + fill_f = com._get_fill_func(method) + + mask = com.mask_missing(result.values, to_replace) + values = fill_f(result.values, limit=limit, mask=mask) + + if values.dtype == orig_dtype and inplace: + return + + result = pd.Series(values, index=self.index, name=self.name, + dtype=self.dtype) + + if inplace: + self._data = result._data + return + + return result + + class NDFrame(PandasObject): """ @@ -1581,7 +1609,7 @@ def bfill(self, axis=0, inplace=False, limit=None, downcast=None): limit=limit, downcast=downcast) def replace(self, to_replace=None, value=None, inplace=False, limit=None, - regex=False, method=None, axis=None): + regex=False, method='pad', axis=None): """ Replace values given in 'to_replace' with 'value'. @@ -1643,14 +1671,19 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, string. Otherwise, `to_replace` must be ``None`` because this parameter will be interpreted as a regular expression or a list, dict, or array of regular expressions. + method : string, optional, {'pad', 'ffill', 'bfill'} + The method to use when for replacement, when ``to_replace`` is a + ``list``. See also -------- - reindex, asfreq, fillna + NDFrame.reindex + NDFrame.asfreq + NDFrame.fillna Returns ------- - filled : DataFrame + filled : NDFrame Raises ------ @@ -1681,11 +1714,6 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, if not com.is_bool(regex) and to_replace is not None: raise AssertionError("'to_replace' must be 'None' if 'regex' is " "not a bool") - if method is not None: - from warnings import warn - warn('the "method" argument is deprecated and will be removed in' - 'v0.13; this argument has no effect') - if axis is not None: from warnings import warn warn('the "axis" argument is deprecated and will be removed in' @@ -1693,14 +1721,16 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, self._consolidate_inplace() - def is_dictlike(x): - return isinstance(x, (dict, com.ABCSeries)) - if value is None: + if isinstance(to_replace, list): + return _single_replace(self, to_replace, method, inplace, + limit) + if not is_dictlike(to_replace): if not is_dictlike(regex): raise TypeError('If "to_replace" and "value" are both None' - ' then regex must be a mapping') + ' and "to_replace" is not a list, then ' + 'regex must be a mapping') to_replace = regex regex = True diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index 556973acdcb95..4d86e8ae4a25b 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -4608,6 +4608,63 @@ def test_replace(self): result = ser.replace([0, 1, 2, 3, 4], [4, 3, 2, 1, 0]) assert_series_equal(result, Series([4, 3, 2, 1, 0])) + def test_replace_with_single_list(self): + ser = Series([0, 1, 2, 3, 4]) + result = ser.replace([1,2,3]) + assert_series_equal(result, Series([0,0,0,0,4])) + + s = ser.copy() + s.replace([1,2,3],inplace=True) + assert_series_equal(s, Series([0,0,0,0,4])) + + # make sure things don't get corrupted when fillna call fails + s = ser.copy() + with tm.assertRaises(ValueError): + s.replace([1,2,3],inplace=True,method='crash_cymbal') + assert_series_equal(s, ser) + + def test_replace_mixed_types(self): + s = Series(np.arange(5)) + + def check_replace(to_rep, val, expected): + sc = s.copy() + r = s.replace(to_rep, val) + sc.replace(to_rep, val, inplace=True) + assert_series_equal(expected, r) + assert_series_equal(expected, sc) + + # should NOT upcast to float + e = Series([0,1,2,3,4]) + tr, v = [3], [3.0] + check_replace(tr, v, e) + + # MUST upcast to float + e = Series([0,1,2,3.5,4]) + tr, v = [3], [3.5] + check_replace(tr, v, e) + + # casts to object + e = Series([0,1,2,3.5,'a']) + tr, v = [3,4], [3.5,'a'] + check_replace(tr, v, e) + + # again casts to object + e = Series([0,1,2,3.5,Timestamp('20130101')]) + tr, v = [3,4],[3.5,Timestamp('20130101')] + check_replace(tr, v, e) + + # casts to float + e = Series([0,1,2,3.5,1]) + tr, v = [3,4],[3.5,True] + check_replace(tr, v, e) + + # test an object with dates + floats + integers + strings + dr = date_range('1/1/2001', '1/10/2001', + freq='D').to_series().reset_index(drop=True) + r = dr.astype(object).replace([dr[0],dr[1],dr[2]], [1.0,2,'a']) + assert_series_equal(r, Series([1.0,2,'a'] + + dr[3:].tolist(),dtype=object)) + def test_asfreq(self): ts = Series([0., 1., 2.], index=[datetime(2009, 10, 30), datetime(2009, 11, 30),