From c8c2118b20b0308bd5ea210ad67cb06e50206387 Mon Sep 17 00:00:00 2001 From: sinhrks Date: Sun, 30 Mar 2014 00:29:29 +0900 Subject: [PATCH] ENH: rename function now has errors keyword --- doc/source/release.rst | 2 ++ doc/source/v0.14.0.txt | 2 ++ pandas/core/generic.py | 38 +++++++++++++++++++++++++++++++++--- pandas/tests/test_frame.py | 40 +++++++++++++++++++++++++++++++++++++- 4 files changed, 78 insertions(+), 4 deletions(-) diff --git a/doc/source/release.rst b/doc/source/release.rst index 80a833848f8fa..d8549886578d9 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -154,6 +154,8 @@ API Changes - ``Series.iteritems()`` is now lazy (returns an iterator rather than a list). This was the documented behavior prior to 0.14. (:issue:`6760`) - ``Panel.shift`` now uses ``NDFrame.shift``. It no longer drops the ``nan`` data and retains its original shape. (:issue:`4867`) +- ``rename`` function can now accept ``errors`` keyword to suppress error raised by a passed function, or raise ValueError when any of label in a passed dict is not found in the target axis. + Deprecations ~~~~~~~~~~~~ diff --git a/doc/source/v0.14.0.txt b/doc/source/v0.14.0.txt index 23ab8f10116c1..c32e10c358b62 100644 --- a/doc/source/v0.14.0.txt +++ b/doc/source/v0.14.0.txt @@ -199,6 +199,8 @@ API changes - ``Series.iteritems()`` is now lazy (returns an iterator rather than a list). This was the documented behavior prior to 0.14. (:issue:`6760`) - ``Panel.shift`` now uses ``NDFrame.shift``. It no longer drops the ``nan`` data and retains its original shape. (:issue:`4867`) +- ``rename`` function can now accept ``errors`` keyword to suppress error raised by a passed function, or raise ValueError when any of label in a passed dict is not found in the target axis. From this version, passing a dict with labels which isn't included in the axis results in FutureWarning, and will raise ValueError in future version. + MultiIndexing Using Slicers ~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/pandas/core/generic.py b/pandas/core/generic.py index cec6ce2cabcdd..78303198fb9a3 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -524,6 +524,10 @@ def swaplevel(self, i, j, axis=0): inplace : boolean, default False Whether to return a new %(klass)s. If True then value of copy is ignored. + errors : {'ignore', 'raise'}, default 'raise' + If 'raise', ValueError is raised when any of label in the dict doesn't exist in target axis. + Also, errors raised by passed function is not suppressed. + If 'ignore', suppress errors and rename only labels which doesn't trigger errors. Returns ------- @@ -538,19 +542,46 @@ def rename(self, *args, **kwargs): copy = kwargs.get('copy', True) inplace = kwargs.get('inplace', False) + # default should be 'raise' in future version + errors = kwargs.get('errors', None) + # errors = kwargs.get('errors', 'raise') + if (com._count_not_none(*axes.values()) == 0): raise TypeError('must pass an index to rename') # renamer function if passed a dict - def _get_rename_function(mapper): + def _get_rename_function(mapper, axis): if isinstance(mapper, (dict, ABCSeries)): + if errors != 'ignore': + # once cast a view to list for python 3 + labels = np.array(list(mapper.keys())) + axis = self._get_axis(axis) + indexer = axis.get_indexer(labels) + mask = indexer == -1 + if mask.any(): + msg = 'labels %s not contained in axis' % labels[mask] + + if errors is None: + # should be removed in future version + warnings.warn("%s will results in ValueError in the future." + "Use 'errors' keyword to suppress/force error" % msg, + FutureWarning) + else: + raise ValueError(msg) + def f(x): if x in mapper: return mapper[x] else: return x else: - f = mapper + def f(x): + try: + return mapper(x) + except Exception: + if errors != 'ignore': + raise + return x return f @@ -562,7 +593,8 @@ def f(x): v = axes.get(self._AXIS_NAMES[axis]) if v is None: continue - f = _get_rename_function(v) + + f = _get_rename_function(v, axis) baxis = self._get_block_manager_axis(axis) result._data = result._data.rename(f, axis=baxis, copy=copy) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 1bbcba0e4caad..b4f8123979563 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -9031,8 +9031,23 @@ def test_rename(self): index = MultiIndex.from_tuples(tuples_index, names=['foo', 'bar']) columns = MultiIndex.from_tuples(tuples_columns, names=['fizz', 'buzz']) renamer = DataFrame([(0,0),(1,1)], index=index, columns=columns) + + with tm.assert_produces_warning(): + # should raise ValueError in future version + renamed = renamer.rename(index={'foo1': 'foo3', 'bar2': 'bar3'}, + columns={'fizz1': 'fizz3', 'buzz2': 'buzz3'}) + new_index = MultiIndex.from_tuples([('foo3', 'bar1'), ('foo2', 'bar3')]) + new_columns = MultiIndex.from_tuples([('fizz3', 'buzz1'), ('fizz2', 'buzz3')]) + self.assert_numpy_array_equal(renamed.index, new_index) + self.assert_numpy_array_equal(renamed.columns, new_columns) + self.assertEquals(renamed.index.names, renamer.index.names) + self.assertEquals(renamed.columns.names, renamer.columns.names) + + self.assertRaises(ValueError, renamer.rename, index={'foo1': 'foo3', 'bar2': 'bar3'}, + columns={'fizz1': 'fizz3', 'buzz2': 'buzz3'}, errors='raise') + renamed = renamer.rename(index={'foo1': 'foo3', 'bar2': 'bar3'}, - columns={'fizz1': 'fizz3', 'buzz2': 'buzz3'}) + columns={'fizz1': 'fizz3', 'buzz2': 'buzz3'}, errors='ignore') new_index = MultiIndex.from_tuples([('foo3', 'bar1'), ('foo2', 'bar3')]) new_columns = MultiIndex.from_tuples([('fizz3', 'buzz1'), ('fizz2', 'buzz3')]) self.assert_numpy_array_equal(renamed.index, new_index) @@ -9040,6 +9055,29 @@ def test_rename(self): self.assertEquals(renamed.index.names, renamer.index.names) self.assertEquals(renamed.columns.names, renamer.columns.names) + # error handling + data = {1: {'A': 0, 'B': 1}, '2': {'C':1, 'D': 2}} + df = DataFrame(data) + + # errors = default + with tm.assert_produces_warning(): + # should raise ValueError in future version + renamed = df.rename(columns={'1': 'One', '2': 'Two'}) + self.assertEqual(renamed.columns.tolist(), [1, 'Two']) + self.assertRaises(TypeError, df.rename, columns=lambda x: x + 1) + + # errors = raise + self.assertRaises(ValueError, df.rename, + columns={'1': 'One', '2': 'Two'}, errors='raise') + self.assertRaises(TypeError, df.rename, columns=lambda x: x + 1, errors='raise') + + # errors = ignore + renamed = df.rename(columns={'1': 'One', '2': 'Two'}, errors='ignore') + self.assertEqual(renamed.columns.tolist(), [1, 'Two']) + + renamed = df.rename(columns=lambda x: x + 1, errors='ignore') + self.assertEqual(renamed.columns.tolist(), [2, '2']) + def test_rename_nocopy(self): renamed = self.frame.rename(columns={'C': 'foo'}, copy=False) renamed['foo'] = 1.