From 49cae216095bed30501c8bdbcbb0886c0a5f957b Mon Sep 17 00:00:00 2001 From: Aaron Staple Date: Fri, 26 Sep 2014 19:35:32 -0700 Subject: [PATCH] BUG: Panel.fillna with method='ffill' ignores the axis parameter (GH8251) --- pandas/core/generic.py | 97 +++++++++++++++++++++++++++----------- pandas/tests/test_frame.py | 9 +++- pandas/tests/test_panel.py | 77 ++++++++++++++++++++++++++++++ 3 files changed, 155 insertions(+), 28 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 9f9b543f0fa7d..fad15dc5b53a0 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2220,7 +2220,7 @@ def convert_objects(self, convert_dates=True, convert_numeric=False, #---------------------------------------------------------------------- # Filling NA's - def fillna(self, value=None, method=None, axis=0, inplace=False, + def fillna(self, value=None, method=None, axis=None, inplace=False, limit=None, downcast=None): """ Fill NA/NaN values using the specified method @@ -2236,9 +2236,11 @@ def fillna(self, value=None, method=None, axis=0, inplace=False, values specifying which value to use for each index (for a Series) or column (for a DataFrame). (values not in the dict/Series/DataFrame will not be filled). This value cannot be a list. - axis : {0, 1}, default 0 + axis : {0, 1, 2} + Fill along this axis. For a DataFrame: * 0: fill column-by-column * 1: fill row-by-row + Default: fill column-by-column (0 for DataFrame, 1 for Panel) inplace : boolean, default False If True, fill in place. Note: this will modify any other views on this object, (e.g. a no-copy slice for a column in a @@ -2263,6 +2265,9 @@ def fillna(self, value=None, method=None, axis=0, inplace=False, 'you passed a "{0}"'.format(type(value).__name__)) self._consolidate_inplace() + if axis is None: + axis = max(0, self.ndim - 2) + axis = self._get_axis_number(axis) method = com._clean_fill_method(method) @@ -2270,38 +2275,66 @@ def fillna(self, value=None, method=None, axis=0, inplace=False, if value is None: if method is None: raise ValueError('must specify a fill method or value') - if self._is_mixed_type and axis == 1: + + # 2d or less + if self.ndim <= 2: + if self._is_mixed_type and axis == 1: + if inplace: + raise NotImplementedError( + 'Cannot fill mixed type on axis 1 in place') + if downcast: + raise NotImplementedError( + 'Cannot fill mixed type on axis 1 with downcast') + + result = self.T.fillna(method=method, limit=limit).T + + # need to downcast here because of all of the transposes + result._data = result._data.downcast() + + return result + + method = com._clean_fill_method(method) + new_data = self._data.interpolate(method=method, + axis=axis, + limit=limit, + inplace=inplace, + coerce=True, + downcast=downcast) + + # 3d + elif self.ndim == 3: if inplace: - raise NotImplementedError() - result = self.T.fillna(method=method, limit=limit).T + raise NotImplementedError('Cannot fill Panel in place') - # need to downcast here because of all of the transposes - result._data = result._data.downcast() + if axis == 0: + if downcast: + raise NotImplementedError( + 'Cannot fill Panel on axis 0 with downcast') - return result + swapped = self.swapaxes(0, 1, copy=False) + filled = swapped.fillna(method=method, axis=1, limit=limit) + result = filled.swapaxes(0, 1, copy=False) + + # need to downcast here because of all of the transposes + result._data = result._data.downcast() + + return result + + else: + # fill in 2d chunks + result = dict([(col, s.fillna(method=method, + axis=axis - 1, + limit=limit, + downcast=downcast)) + for col, s in compat.iteritems(self)]) + return self._constructor.from_dict(result).__finalize__(self) # > 3d - if self.ndim > 3: + elif self.ndim > 3: raise NotImplementedError( 'Cannot fillna with a method for > 3dims' ) - # 3d - elif self.ndim == 3: - - # fill in 2d chunks - result = dict([(col, s.fillna(method=method, value=value)) - for col, s in compat.iteritems(self)]) - return self._constructor.from_dict(result).__finalize__(self) - - # 2d or less - method = com._clean_fill_method(method) - new_data = self._data.interpolate(method=method, - axis=axis, - limit=limit, - inplace=inplace, - coerce=True, - downcast=downcast) else: if method is not None: raise ValueError('cannot specify both a fill method and value') @@ -2324,11 +2357,19 @@ def fillna(self, value=None, method=None, axis=0, inplace=False, downcast=downcast) elif isinstance(value, (dict, com.ABCSeries)): + if self.ndim >= 3: + raise NotImplementedError('Cannot fillna with a dict/Series ' + 'for >= 3 dims') + if axis == 1: raise NotImplementedError('Currently only can fill ' 'with dict/Series column ' 'by column') + if downcast: + raise NotImplementedError( + 'Cannot downcast with dict/Series') + result = self if inplace else self.copy() for k, v in compat.iteritems(value): if k not in result: @@ -2336,11 +2377,13 @@ def fillna(self, value=None, method=None, axis=0, inplace=False, obj = result[k] obj.fillna(v, limit=limit, inplace=True) return result + elif not com.is_list_like(value): new_data = self._data.fillna(value=value, limit=limit, inplace=inplace, downcast=downcast) + elif isinstance(value, DataFrame) and self.ndim == 2: new_data = self.where(self.notnull(), value) else: @@ -2351,12 +2394,12 @@ def fillna(self, value=None, method=None, axis=0, inplace=False, else: return self._constructor(new_data).__finalize__(self) - def ffill(self, axis=0, inplace=False, limit=None, downcast=None): + def ffill(self, axis=None, inplace=False, limit=None, downcast=None): "Synonym for NDFrame.fillna(method='ffill')" return self.fillna(method='ffill', axis=axis, inplace=inplace, limit=limit, downcast=downcast) - def bfill(self, axis=0, inplace=False, limit=None, downcast=None): + def bfill(self, axis=None, inplace=False, limit=None, downcast=None): "Synonym for NDFrame.fillna(method='bfill')" return self.fillna(method='bfill', axis=axis, inplace=inplace, limit=limit, downcast=downcast) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 80d81f4e283f3..ca7b997aaced3 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -7628,9 +7628,11 @@ def test_fillna_dict_series(self): expected = df.fillna(df.max().to_dict()) assert_frame_equal(result, expected) - # disable this for now + # disable these for now with assertRaisesRegexp(NotImplementedError, 'column by column'): df.fillna(df.max(1), axis=1) + with assertRaisesRegexp(NotImplementedError, 'downcast'): + df.fillna(df.max(1), downcast='infer') def test_fillna_dataframe(self): # GH 8377 @@ -7668,6 +7670,11 @@ def test_fillna_columns(self): expected = df.astype(float).fillna(method='ffill', axis=1) assert_frame_equal(result, expected) + # disable these for now + with assertRaisesRegexp(NotImplementedError, 'axis 1.*in place'): + df.fillna(method='ffill', axis=1, inplace=True) + with assertRaisesRegexp(NotImplementedError, 'axis 1.*downcast'): + df.fillna(method='ffill', axis=1, downcast='infer') def test_fillna_invalid_method(self): with assertRaisesRegexp(ValueError, 'ffil'): diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 736cdf312b361..4a3e02aa75867 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -1336,9 +1336,12 @@ def test_sort_index(self): assert_panel_equal(sorted_panel, self.panel) def test_fillna(self): + # Fill with a value. filled = self.panel.fillna(0) self.assertTrue(np.isfinite(filled.values).all()) + # If no axis is specified, fill along axis 1, equivalent to axis 0 of + # each DataFrame. filled = self.panel.fillna(method='backfill') assert_frame_equal(filled['ItemA'], self.panel['ItemA'].fillna(method='backfill')) @@ -1350,10 +1353,72 @@ def test_fillna(self): assert_frame_equal(filled['ItemA'], panel['ItemA'].fillna(method='backfill')) + # Fill forward. + filled = self.panel.fillna(method='ffill') + assert_frame_equal(filled['ItemA'], + self.panel['ItemA'].fillna(method='ffill')) + + # With limit. + filled = self.panel.fillna(method='backfill', limit=1) + assert_frame_equal(filled['ItemA'], + self.panel['ItemA'].fillna(method='backfill', limit=1)) + + # With downcast. + rounded = self.panel.apply(lambda x: x.apply(np.round)) + filled = rounded.fillna(method='backfill', downcast='infer') + assert_frame_equal(filled['ItemA'], + rounded['ItemA'].fillna(method='backfill', downcast='infer')) + + # Now explicitly request axis 1. + filled = self.panel.fillna(method='backfill', axis=1) + assert_frame_equal(filled['ItemA'], + self.panel['ItemA'].fillna(method='backfill', axis=0)) + + # Fill along axis 2, equivalent to filling along axis 1 of each + # DataFrame. + filled = self.panel.fillna(method='backfill', axis=2) + assert_frame_equal(filled['ItemA'], + self.panel['ItemA'].fillna(method='backfill', axis=1)) + + # Fill an empty panel. empty = self.panel.reindex(items=[]) filled = empty.fillna(0) assert_panel_equal(filled, empty) + def test_fillna_axis_0(self): + # Forward fill along axis 0, interpolating values across DataFrames. + filled = self.panel.fillna(method='ffill', axis=0) + nan_indexes = self.panel['ItemB']['C'].index[ + self.panel['ItemB']['C'].apply(np.isnan)] + # Values from ItemA are filled into ItemB. + assert_series_equal(filled['ItemB']['C'][nan_indexes], + self.panel['ItemA']['C'][nan_indexes]) + + # Backfill along axis 0. + filled = self.panel.fillna(method='backfill', axis=0) + # The test data lacks values that can be backfilled on axis 0. + assert_panel_equal(filled, self.panel) + # Reverse the panel and backfill along axis 0, to properly test + # backfill. + reverse_panel = self.panel.reindex_axis(reversed(self.panel.axes[0])) + filled = reverse_panel.fillna(method='bfill', axis=0) + nan_indexes = reverse_panel['ItemB']['C'].index[ + reverse_panel['ItemB']['C'].apply(np.isnan)] + assert_series_equal(filled['ItemB']['C'][nan_indexes], + reverse_panel['ItemA']['C'][nan_indexes]) + + # Fill along axis 0 with limit. + filled = self.panel.fillna(method='ffill', axis=0, limit=1) + a_nan = self.panel['ItemA']['C'].index[ + self.panel['ItemA']['C'].apply(np.isnan)] + b_nan = self.panel['ItemB']['C'].index[ + self.panel['ItemB']['C'].apply(np.isnan)] + # Cells that are nan in ItemB but not in ItemA remain unfilled in + # ItemC. + self.assertTrue( + filled['ItemC']['C'][b_nan.diff(a_nan)].apply(np.isnan).all()) + + def test_fillna_error(self): self.assertRaises(ValueError, self.panel.fillna) self.assertRaises(ValueError, self.panel.fillna, 5, method='ffill') @@ -1365,6 +1430,18 @@ def test_fillna(self): p.iloc[0:2,0:2,0:2] = np.nan self.assertRaises(NotImplementedError, lambda : p.fillna(999,limit=1)) + # Method fill is not implemented with inplace=True. + self.assertRaises(NotImplementedError, + lambda: self.panel.fillna(method='bfill', inplace=True)) + + # Method fill is not implemented with downcast on axis=0. + self.assertRaises(NotImplementedError, + lambda: self.panel.fillna(method='bfill', axis=0, downcast='infer')) + + # Value fill is not implemented with dict. + self.assertRaises(NotImplementedError, + lambda: self.panel.fillna(value={'a': 'b'})) + def test_ffill_bfill(self): assert_panel_equal(self.panel.ffill(), self.panel.fillna(method='ffill'))