From 8ac61c535213ccb29c50b220eb6ea370d950dc37 Mon Sep 17 00:00:00 2001 From: prossahl Date: Thu, 22 Aug 2013 16:47:56 +0100 Subject: [PATCH 1/2] BUG/ER: Stricter testing of 'monotocity' when reindexing with ffill or bfill (GH4483). --- doc/source/basics.rst | 6 +++--- doc/source/missing_data.rst | 3 +++ doc/source/release.rst | 2 ++ pandas/core/index.py | 8 ++++---- pandas/tests/test_frame.py | 23 +++++++++++++++++++++++ pandas/tseries/tests/test_timeseries.py | 2 +- 6 files changed, 36 insertions(+), 8 deletions(-) diff --git a/doc/source/basics.rst b/doc/source/basics.rst index a0818831fb988..9c0bdd983707c 100644 --- a/doc/source/basics.rst +++ b/doc/source/basics.rst @@ -724,6 +724,8 @@ We illustrate these fill methods on a simple TimeSeries: ts2.reindex(ts.index, method='ffill') ts2.reindex(ts.index, method='bfill') +Note these methods require that the indexes are **order increasing**. + Note the same result could have been achieved using :ref:`fillna `: @@ -731,9 +733,7 @@ Note the same result could have been achieved using :ref:`fillna ts2.reindex(ts.index).fillna(method='ffill') -Note these methods generally assume that the indexes are **sorted**. They may -be modified in the future to be a bit more flexible but as time series data is -ordered most of the time anyway, this has not been a major priority. +Note that this method does not check the order of the index. .. _basics.drop: diff --git a/doc/source/missing_data.rst b/doc/source/missing_data.rst index 0c8efb4e905ec..c6da92f4d6cc2 100644 --- a/doc/source/missing_data.rst +++ b/doc/source/missing_data.rst @@ -205,6 +205,9 @@ To remind you, these are the available filling methods: With time series data, using pad/ffill is extremely common so that the "last known value" is available at every time point. +The ``ffill()`` function is equivalent to ``fillna(method='ffill')`` +and ``bfill()`` is equivalent to ``fillna(method='bfill')`` + .. _missing_data.dropna: Dropping axis labels with missing data: dropna diff --git a/doc/source/release.rst b/doc/source/release.rst index 745876e8c448e..da787d5b423f9 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -62,6 +62,8 @@ pandas 0.13 **API Changes** + - ``DataFrame.reindex()`` and forward/backward filling now raises ValueError + if either index is not monotonic (:issue: `4483`, :issue: `4484`). - ``pandas`` now is Python 2/3 compatible without the need for 2to3 thanks to @jtratner. As a result, pandas now uses iterators more extensively. This also led to the introduction of substantive parts of the Benjamin diff --git a/pandas/core/index.py b/pandas/core/index.py index 22bd7f318a237..6db82b15435ec 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -905,12 +905,12 @@ def get_indexer(self, target, method=None, limit=None): ' valued Index objects') if method == 'pad': - if not self.is_monotonic: - raise AssertionError('Must be monotonic for forward fill') + if not self.is_monotonic or not target.is_monotonic: + raise ValueError('Must be monotonic for forward fill') indexer = self._engine.get_pad_indexer(target.values, limit) elif method == 'backfill': - if not self.is_monotonic: - raise AssertionError('Must be monotonic for backward fill') + if not self.is_monotonic or not target.is_monotonic: + raise ValueError('Must be monotonic for backward fill') indexer = self._engine.get_backfill_indexer(target.values, limit) elif method is None: indexer = self._engine.get_indexer(target.values) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index c8f87a19a5f34..fa9d795a72808 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -1641,6 +1641,29 @@ def test_nested_exception(self): except Exception as e: self.assertNotEqual(type(e), UnboundLocalError) + def test_reverse_reindex_ffill_raises(self): + dr = pd.date_range('2013-08-01', periods=6, freq='B') + data = np.random.randn(6,1) + df = pd.DataFrame(data, index=dr, columns=list('A')) + df['A'][3] = np.nan + df_rev = pd.DataFrame(data, index=dr[::-1], columns=list('A')) + # Reverse index is not 'monotonic' + self.assertRaises(ValueError, df_rev.reindex, df.index, method='pad') + self.assertRaises(ValueError, df_rev.reindex, df.index, method='ffill') + self.assertRaises(ValueError, df_rev.reindex, df.index, method='bfill') + + def test_reversed_reindex_ffill_raises(self): + dr = pd.date_range('2013-08-01', periods=6, freq='B') + data = np.random.randn(6,1) + df = pd.DataFrame(data, index=dr, columns=list('A')) + df['A'][3] = np.nan + df = pd.DataFrame(data, index=dr, columns=list('A')) + # Reversed reindex is not 'monotonic' + self.assertRaises(ValueError, df.reindex, dr[::-1], method='pad') + self.assertRaises(ValueError, df.reindex, dr[::-1], method='ffill') + self.assertRaises(ValueError, df.reindex, dr[::-1], method='bfill') + + _seriesd = tm.getSeriesData() _tsd = tm.getTimeSeriesData() diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index 172172f667eca..f0b1f72979bda 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -546,7 +546,7 @@ def test_pad_require_monotonicity(self): rng2 = rng[::2][::-1] - self.assertRaises(AssertionError, rng2.get_indexer, rng, + self.assertRaises(ValueError, rng2.get_indexer, rng, method='pad') def test_frame_ctor_datetime64_column(self): From 3bc6deaa670e1c5839a2344e4212a23f0af85b5f Mon Sep 17 00:00:00 2001 From: prossahl Date: Tue, 27 Aug 2013 14:24:39 +0100 Subject: [PATCH 2/2] Minor documentation change. --- doc/source/basics.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/basics.rst b/doc/source/basics.rst index 9c0bdd983707c..a7daf48b97b5e 100644 --- a/doc/source/basics.rst +++ b/doc/source/basics.rst @@ -733,7 +733,8 @@ Note the same result could have been achieved using :ref:`fillna ts2.reindex(ts.index).fillna(method='ffill') -Note that this method does not check the order of the index. +Note that ``reindex`` will raise a ValueError if the index is not +monotonic. ``fillna`` will not make any checks on the order of the index. .. _basics.drop: