From 2b5724b1535d98f7ca131298f18571e430bdd032 Mon Sep 17 00:00:00 2001 From: Guilherme Beltramini Date: Mon, 26 Dec 2016 16:50:27 -0200 Subject: [PATCH 1/3] BUG: Reindex with columns and method --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/frame.py | 14 +++++++------- pandas/sparse/frame.py | 4 ++-- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 40bd8bc4154a6..80851dbbe1843 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -285,6 +285,7 @@ Bug Fixes - Bug in ``DataFrame(..).apply(to_numeric)`` when values are of type decimal.Decimal. (:issue:`14827`) - Bug in ``describe()`` when passing a numpy array which does not contain the median to the ``percentiles`` keyword argument (:issue:`14908`) - Bug in ``DataFrame.sort_values()`` when sorting by multiple columns where one column is of type ``int64`` and contains ``NaT`` (:issue:`14922`) +- Bug in ``DataFrame.reindex()`` when using ``columns`` and ``method`` (:issue:`14992`) - Bug in ``pd.read_msgpack()`` in which ``Series`` categoricals were being improperly processed (:issue:`14901`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index ba1e08ecc482f..7d2486b229e69 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2710,8 +2710,8 @@ def _reindex_axes(self, axes, level, limit, tolerance, method, fill_value, columns = axes['columns'] if columns is not None: - frame = frame._reindex_columns(columns, copy, level, fill_value, - limit, tolerance) + frame = frame._reindex_columns(columns, method, copy, level, + fill_value, limit, tolerance) index = axes['index'] if index is not None: @@ -2722,17 +2722,17 @@ def _reindex_axes(self, axes, level, limit, tolerance, method, fill_value, def _reindex_index(self, new_index, method, copy, level, fill_value=NA, limit=None, tolerance=None): - new_index, indexer = self.index.reindex(new_index, method, level, - limit=limit, + new_index, indexer = self.index.reindex(new_index, method=method, + level=level, limit=limit, tolerance=tolerance) return self._reindex_with_indexers({0: [new_index, indexer]}, copy=copy, fill_value=fill_value, allow_dups=False) - def _reindex_columns(self, new_columns, copy, level, fill_value=NA, + def _reindex_columns(self, new_columns, method, copy, level, fill_value=NA, limit=None, tolerance=None): - new_columns, indexer = self.columns.reindex(new_columns, level=level, - limit=limit, + new_columns, indexer = self.columns.reindex(new_columns, method=method, + level=level, limit=limit, tolerance=tolerance) return self._reindex_with_indexers({1: [new_columns, indexer]}, copy=copy, fill_value=fill_value, diff --git a/pandas/sparse/frame.py b/pandas/sparse/frame.py index 56020e32b9963..a529cc91b9a14 100644 --- a/pandas/sparse/frame.py +++ b/pandas/sparse/frame.py @@ -571,8 +571,8 @@ def _reindex_index(self, index, method, copy, level, fill_value=np.nan, new_series, index=index, columns=self.columns, default_fill_value=self._default_fill_value).__finalize__(self) - def _reindex_columns(self, columns, copy, level, fill_value, limit=None, - takeable=False): + def _reindex_columns(self, columns, method, copy, level, fill_value, + limit=None, takeable=False): if level is not None: raise TypeError('Reindex by level not supported for sparse') From fdbd901e929fded3724386d24255341e8422d0f4 Mon Sep 17 00:00:00 2001 From: Guilherme Beltramini Date: Mon, 26 Dec 2016 19:46:34 -0200 Subject: [PATCH 2/3] BUG: Tests for reindex with columns and methods --- .../tests/frame/test_axis_select_reindex.py | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/pandas/tests/frame/test_axis_select_reindex.py b/pandas/tests/frame/test_axis_select_reindex.py index 9da1b31d259c5..b0a6d33307182 100644 --- a/pandas/tests/frame/test_axis_select_reindex.py +++ b/pandas/tests/frame/test_axis_select_reindex.py @@ -297,6 +297,35 @@ def test_reindex_columns(self): newFrame = self.frame.reindex(columns=[]) self.assertTrue(newFrame.empty) + # GH 14992, reindexing over columns ignored method + df = DataFrame(data=[[11, 12, 13], [21, 22, 23], [31, 32, 33]], + index=[1, 2, 4], + columns=[1, 2, 4], + dtype=float) + expected_def = DataFrame(data=[[np.nan, 11, 12, np.nan, 13, np.nan], + [np.nan, 21, 22, np.nan, 23, np.nan], + [np.nan, 31, 32, np.nan, 33, np.nan]], + index=[1, 2, 4], + columns=range(6), + dtype=float) + expected_ffill = DataFrame(data=[[np.nan, 11, 12, 12, 13, 13], + [np.nan, 21, 22, 22, 23, 23], + [np.nan, 31, 32, 32, 33, 33]], + index=[1, 2, 4], + columns=range(6), + dtype=float) + expected_bfill = DataFrame(data=[[11, 11, 12, 13, 13, np.nan], + [21, 21, 22, 23, 23, np.nan], + [31, 31, 32, 33, 33, np.nan]], + index=[1, 2, 4], + columns=range(6), + dtype=float) + assert_frame_equal(df.reindex(columns=range(6)), expected_def) + assert_frame_equal(df.reindex(columns=range(6), method='ffill'), + expected_ffill) + assert_frame_equal(df.reindex(columns=range(6), method='bfill'), + expected_bfill) + def test_reindex_axes(self): # GH 3317, reindexing by both axes loses freq of the index df = DataFrame(np.ones((3, 3)), From 9c9ad956be81193bf5178e622178fc07a2b1976c Mon Sep 17 00:00:00 2001 From: Guilherme Beltramini Date: Tue, 27 Dec 2016 15:16:16 -0200 Subject: [PATCH 3/3] BUG: Add tests for sparse dataframe --- doc/source/whatsnew/v0.20.0.txt | 2 +- pandas/sparse/frame.py | 5 +- pandas/sparse/tests/test_frame.py | 70 +++++++++++++++++++ .../tests/frame/test_axis_select_reindex.py | 55 +++++++++------ 4 files changed, 107 insertions(+), 25 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 80851dbbe1843..21091d87dbfd7 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -285,7 +285,7 @@ Bug Fixes - Bug in ``DataFrame(..).apply(to_numeric)`` when values are of type decimal.Decimal. (:issue:`14827`) - Bug in ``describe()`` when passing a numpy array which does not contain the median to the ``percentiles`` keyword argument (:issue:`14908`) - Bug in ``DataFrame.sort_values()`` when sorting by multiple columns where one column is of type ``int64`` and contains ``NaT`` (:issue:`14922`) -- Bug in ``DataFrame.reindex()`` when using ``columns`` and ``method`` (:issue:`14992`) +- Bug in ``DataFrame.reindex()`` in which ``method`` was ignored when passing ``columns`` (:issue:`14992`) - Bug in ``pd.read_msgpack()`` in which ``Series`` categoricals were being improperly processed (:issue:`14901`) diff --git a/pandas/sparse/frame.py b/pandas/sparse/frame.py index a529cc91b9a14..e3662ea5effd5 100644 --- a/pandas/sparse/frame.py +++ b/pandas/sparse/frame.py @@ -571,7 +571,7 @@ def _reindex_index(self, index, method, copy, level, fill_value=np.nan, new_series, index=index, columns=self.columns, default_fill_value=self._default_fill_value).__finalize__(self) - def _reindex_columns(self, columns, method, copy, level, fill_value, + def _reindex_columns(self, columns, method, copy, level, fill_value=None, limit=None, takeable=False): if level is not None: raise TypeError('Reindex by level not supported for sparse') @@ -582,6 +582,9 @@ def _reindex_columns(self, columns, method, copy, level, fill_value, if limit: raise NotImplementedError("'limit' argument is not supported") + if notnull(method): + raise NotImplementedError("'method' argument is not supported") + # TODO: fill value handling sdict = dict((k, v) for k, v in compat.iteritems(self) if k in columns) return self._constructor( diff --git a/pandas/sparse/tests/test_frame.py b/pandas/sparse/tests/test_frame.py index ab12099b5624d..83b6a89811ee6 100644 --- a/pandas/sparse/tests/test_frame.py +++ b/pandas/sparse/tests/test_frame.py @@ -799,6 +799,76 @@ def test_reindex_fill_value(self): exp = exp.to_sparse(self.zframe.default_fill_value) tm.assert_sp_frame_equal(result, exp) + def test_reindex_method(self): + + sparse = SparseDataFrame(data=[[11., 12., 14.], + [21., 22., 24.], + [41., 42., 44.]], + index=[1, 2, 4], + columns=[1, 2, 4], + dtype=float) + + # Over indices + + # default method + result = sparse.reindex(index=range(6)) + expected = SparseDataFrame(data=[[nan, nan, nan], + [11., 12., 14.], + [21., 22., 24.], + [nan, nan, nan], + [41., 42., 44.], + [nan, nan, nan]], + index=range(6), + columns=[1, 2, 4], + dtype=float) + tm.assert_sp_frame_equal(result, expected) + + # method='bfill' + result = sparse.reindex(index=range(6), method='bfill') + expected = SparseDataFrame(data=[[11., 12., 14.], + [11., 12., 14.], + [21., 22., 24.], + [41., 42., 44.], + [41., 42., 44.], + [nan, nan, nan]], + index=range(6), + columns=[1, 2, 4], + dtype=float) + tm.assert_sp_frame_equal(result, expected) + + # method='ffill' + result = sparse.reindex(index=range(6), method='ffill') + expected = SparseDataFrame(data=[[nan, nan, nan], + [11., 12., 14.], + [21., 22., 24.], + [21., 22., 24.], + [41., 42., 44.], + [41., 42., 44.]], + index=range(6), + columns=[1, 2, 4], + dtype=float) + tm.assert_sp_frame_equal(result, expected) + + # Over columns + + # default method + result = sparse.reindex(columns=range(6)) + expected = SparseDataFrame(data=[[nan, 11., 12., nan, 14., nan], + [nan, 21., 22., nan, 24., nan], + [nan, 41., 42., nan, 44., nan]], + index=[1, 2, 4], + columns=range(6), + dtype=float) + tm.assert_sp_frame_equal(result, expected) + + # method='bfill' + with tm.assertRaises(NotImplementedError): + sparse.reindex(columns=range(6), method='bfill') + + # method='ffill' + with tm.assertRaises(NotImplementedError): + sparse.reindex(columns=range(6), method='ffill') + def test_take(self): result = self.frame.take([1, 0, 2], axis=1) expected = self.frame.reindex(columns=['B', 'A', 'C']) diff --git a/pandas/tests/frame/test_axis_select_reindex.py b/pandas/tests/frame/test_axis_select_reindex.py index b0a6d33307182..ecce17f96a672 100644 --- a/pandas/tests/frame/test_axis_select_reindex.py +++ b/pandas/tests/frame/test_axis_select_reindex.py @@ -297,34 +297,43 @@ def test_reindex_columns(self): newFrame = self.frame.reindex(columns=[]) self.assertTrue(newFrame.empty) + def test_reindex_columns_method(self): + # GH 14992, reindexing over columns ignored method df = DataFrame(data=[[11, 12, 13], [21, 22, 23], [31, 32, 33]], index=[1, 2, 4], columns=[1, 2, 4], dtype=float) - expected_def = DataFrame(data=[[np.nan, 11, 12, np.nan, 13, np.nan], - [np.nan, 21, 22, np.nan, 23, np.nan], - [np.nan, 31, 32, np.nan, 33, np.nan]], - index=[1, 2, 4], - columns=range(6), - dtype=float) - expected_ffill = DataFrame(data=[[np.nan, 11, 12, 12, 13, 13], - [np.nan, 21, 22, 22, 23, 23], - [np.nan, 31, 32, 32, 33, 33]], - index=[1, 2, 4], - columns=range(6), - dtype=float) - expected_bfill = DataFrame(data=[[11, 11, 12, 13, 13, np.nan], - [21, 21, 22, 23, 23, np.nan], - [31, 31, 32, 33, 33, np.nan]], - index=[1, 2, 4], - columns=range(6), - dtype=float) - assert_frame_equal(df.reindex(columns=range(6)), expected_def) - assert_frame_equal(df.reindex(columns=range(6), method='ffill'), - expected_ffill) - assert_frame_equal(df.reindex(columns=range(6), method='bfill'), - expected_bfill) + + # default method + result = df.reindex(columns=range(6)) + expected = DataFrame(data=[[np.nan, 11, 12, np.nan, 13, np.nan], + [np.nan, 21, 22, np.nan, 23, np.nan], + [np.nan, 31, 32, np.nan, 33, np.nan]], + index=[1, 2, 4], + columns=range(6), + dtype=float) + assert_frame_equal(result, expected) + + # method='ffill' + result = df.reindex(columns=range(6), method='ffill') + expected = DataFrame(data=[[np.nan, 11, 12, 12, 13, 13], + [np.nan, 21, 22, 22, 23, 23], + [np.nan, 31, 32, 32, 33, 33]], + index=[1, 2, 4], + columns=range(6), + dtype=float) + assert_frame_equal(result, expected) + + # method='bfill' + result = df.reindex(columns=range(6), method='bfill') + expected = DataFrame(data=[[11, 11, 12, 13, 13, np.nan], + [21, 21, 22, 23, 23, np.nan], + [31, 31, 32, 33, 33, np.nan]], + index=[1, 2, 4], + columns=range(6), + dtype=float) + assert_frame_equal(result, expected) def test_reindex_axes(self): # GH 3317, reindexing by both axes loses freq of the index