diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index 462ead70c9f93..09aeac3c6ab65 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -10,7 +10,7 @@ from pandas.core.algorithms import factorize from pandas.core.base import PandasObject, PandasDelegate, NoNewAttributesMixin import pandas.core.common as com -from pandas.core.missing import interpolate_2d +from pandas.core.missing import pad from pandas.util.decorators import cache_readonly, deprecate_kwarg from pandas.core.common import (ABCSeries, ABCIndexClass, ABCPeriodIndex, ABCCategoricalIndex, @@ -1340,8 +1340,7 @@ def fillna(self, value=None, method=None, limit=None): if method is not None: values = self.to_dense().reshape(-1, len(self)) - values = interpolate_2d( - values, method, 0, None, value).astype(self.categories.dtype)[0] + values = pad(values, method, 0, None, value).astype(self.categories.dtype)[0] values = _get_codes_for_values(values, self.categories) else: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 6aeb4d83649ef..bd23df101ec76 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -16,7 +16,7 @@ from pandas.tseries.period import PeriodIndex from pandas.core.internals import BlockManager import pandas.core.common as com -import pandas.core.missing as mis +import pandas.core.missing as missing import pandas.core.datetools as datetools from pandas import compat from pandas.compat import map, zip, lrange, string_types, isidentifier @@ -51,7 +51,7 @@ def _single_replace(self, to_replace, method, inplace, limit): orig_dtype = self.dtype result = self if inplace else self.copy() - fill_f = mis._get_fill_func(method) + fill_f = missing._get_fill_func(method) mask = com.mask_missing(result.values, to_replace) values = fill_f(result.values, limit=limit, mask=mask) @@ -1929,7 +1929,7 @@ def reindex(self, *args, **kwargs): # construct the args axes, kwargs = self._construct_axes_from_arguments(args, kwargs) - method = mis._clean_reindex_fill_method(kwargs.pop('method', None)) + method = missing._clean_reindex_fill_method(kwargs.pop('method', None)) level = kwargs.pop('level', None) copy = kwargs.pop('copy', True) limit = kwargs.pop('limit', None) @@ -2042,7 +2042,7 @@ def reindex_axis(self, labels, axis=0, method=None, level=None, copy=True, axis_name = self._get_axis_name(axis) axis_values = self._get_axis(axis_name) - method = mis._clean_reindex_fill_method(method) + method = missing._clean_reindex_fill_method(method) new_index, indexer = axis_values.reindex(labels, method, level, limit=limit) return self._reindex_with_indexers( @@ -2774,40 +2774,28 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, # set the default here, so functions examining the signaure # can detect if something was set (e.g. in groupby) (GH9221) if axis is None: - axis = 0 + axis = self._stat_axis_name axis = self._get_axis_number(axis) - method = mis._clean_fill_method(method) + method = missing._clean_fill_method(method) from pandas import DataFrame if value is None: if method is None: raise ValueError('must specify a fill method or value') - if self._is_mixed_type and axis == 1: + if self._is_mixed_type: + if (self.ndim > 2) and (axis == 0): + raise NotImplementedError('cannot fill across axis 0 for mixed dtypes') if inplace: - raise NotImplementedError() - result = self.T.fillna(method=method, limit=limit).T - - # need to downcast here because of all of the transposes - result._data = result._data.downcast() - - return result - - # > 3d - if self.ndim > 3: - raise NotImplementedError( - 'Cannot fillna with a method for > 3dims' - ) + raise NotImplementedError('cannot fill inplace for mixed dtypes') + elif (self.ndim == 2) and (axis == 1): + result = self.T.fillna(method=method, limit=limit).T - # 3d - elif self.ndim == 3: + # need to downcast here because of all of the transposes + result._data = result._data.downcast() - # fill in 2d chunks - result = dict([(col, s.fillna(method=method, value=value)) - for col, s in compat.iteritems(self)]) - return self._constructor.from_dict(result).__finalize__(self) + return result - # 2d or less - method = mis._clean_fill_method(method) + method = missing._clean_fill_method(method) new_data = self._data.interpolate(method=method, axis=axis, limit=limit, @@ -3750,7 +3738,7 @@ def align(self, other, join='outer', axis=None, level=None, copy=True, fill_value=None, method=None, limit=None, fill_axis=0, broadcast_axis=None): from pandas import DataFrame, Series - method = mis._clean_fill_method(method) + method = missing._clean_fill_method(method) if broadcast_axis == 1 and self.ndim != other.ndim: if isinstance(self, Series): diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 1b08140ebec09..acfec52162658 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -25,7 +25,7 @@ from pandas.core.categorical import Categorical, maybe_to_categorical from pandas.tseries.index import DatetimeIndex import pandas.core.common as com -import pandas.core.missing as mis +import pandas.core.missing as missing import pandas.core.convert as convert from pandas.sparse.array import _maybe_to_sparse, SparseArray import pandas.lib as lib @@ -853,7 +853,7 @@ def check_int_bool(self, inplace): # a fill na type method try: - m = mis._clean_fill_method(method) + m = missing._clean_fill_method(method) except: m = None @@ -871,7 +871,7 @@ def check_int_bool(self, inplace): mgr=mgr) # try an interp method try: - m = mis._clean_interp_method(method, **kwargs) + m = missing._clean_interp_method(method, **kwargs) except: m = None @@ -910,12 +910,12 @@ def _interpolate_with_fill(self, method='pad', axis=0, inplace=False, values = self.values if inplace else self.values.copy() values, _, fill_value, _ = self._try_coerce_args(values, fill_value) values = self._try_operate(values) - values = mis.interpolate_2d(values, - method=method, - axis=axis, - limit=limit, - fill_value=fill_value, - dtype=self.dtype) + values = missing.pad(values, + method=method, + axis=axis, + limit=limit, + fill_value=fill_value, + dtype=self.dtype) values = self._try_coerce_result(values) blocks = [self.make_block(values, @@ -950,8 +950,8 @@ def func(x): # process a 1-d slice, returning it # should the axis argument be handled below in apply_along_axis? - # i.e. not an arg to mis.interpolate_1d - return mis.interpolate_1d(index, x, method=method, limit=limit, + # i.e. not an arg to missing.interpolate + return missing.interpolate(index, x, method=method, limit=limit, limit_direction=limit_direction, fill_value=fill_value, bounds_error=False, **kwargs) @@ -2358,7 +2358,7 @@ def make_block_same_class(self, values, placement, def interpolate(self, method='pad', axis=0, inplace=False, limit=None, fill_value=None, **kwargs): - values = mis.interpolate_2d( + values = missing.pad( self.values.to_dense(), method, axis, limit, fill_value) return self.make_block_same_class(values=values, placement=self.mgr_locs) @@ -3774,8 +3774,8 @@ def reindex(self, new_axis, indexer=None, method=None, fill_value=None, # fill if needed if method is not None or limit is not None: - new_values = mis.interpolate_2d(new_values, method=method, - limit=limit, fill_value=fill_value) + new_values = missing.pad(new_values, method=method, + limit=limit, fill_value=fill_value) if self._block.is_sparse: make_block = self._block.make_block_same_class diff --git a/pandas/core/missing.py b/pandas/core/missing.py index f1143ad808b91..67aeea5878ef6 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -49,9 +49,9 @@ def _clean_interp_method(method, **kwargs): return method -def interpolate_1d(xvalues, yvalues, method='linear', limit=None, - limit_direction='forward', - fill_value=None, bounds_error=False, order=None, **kwargs): +def interpolate(xvalues, yvalues, method='linear', limit=None, + limit_direction='forward', + fill_value=None, bounds_error=False, order=None, **kwargs): """ Logic for the 1-d interpolation. The result should be 1-d, inputs xvalues and yvalues will each be 1-d arrays of the same length. @@ -219,20 +219,42 @@ def _interpolate_scipy_wrapper(x, y, new_x, method, fill_value=None, return new_y -def interpolate_2d(values, method='pad', axis=0, limit=None, fill_value=None, dtype=None): - """ perform an actual interpolation of values, values will be make 2-d if - needed fills inplace, returns the result +def pad(values, method='pad', axis=0, limit=None, fill_value=None, dtype=None): + """ + Perform an actual interpolation of values. 1-d values will be made 2-d temporarily. + Returns the result """ - transf = (lambda x: x) if axis == 0 else (lambda x: x.T) + ndim = values.ndim # reshape a 1 dim if needed - ndim = values.ndim - if values.ndim == 1: + if ndim == 1: if axis != 0: # pragma: no cover raise AssertionError("cannot interpolate on a ndim == 1 with " "axis != 0") values = values.reshape(tuple((1,) + values.shape)) + # recursively slice n-dimension frames (n>2) into (n-1)-dimension frames + elif ndim > 2: + slice_axis = 1 if axis == 0 else 0 + slicer = [slice(None)]*ndim + + if ndim == 3: + axis = 0 if (axis > 1) else 1 + else: + axis = axis - 1 if (axis > 0) else 0 + + for n in range(values.shape[slice_axis]): + slicer[slice_axis] = n + values[slicer] = pad(values[slicer], + method=method, + axis=axis, + limit=limit, + fill_value=fill_value, + dtype=dtype) + + return values + + transf = (lambda x: x) if axis == 0 else (lambda x: x.T) if fill_value is None: mask = None diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 1f8bcf8c9879f..7b130fab92f16 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -1454,20 +1454,98 @@ def test_fillna(self): assert_frame_equal(filled['ItemA'], panel['ItemA'].fillna(method='backfill')) + # GH 11445 + # Fill forward. + filled = self.panel.fillna(method='ffill') + assert_frame_equal(filled['ItemA'], + self.panel['ItemA'].fillna(method='ffill')) + + # With limit. + filled = self.panel.fillna(method='backfill', limit=1) + assert_frame_equal(filled['ItemA'], + self.panel['ItemA'].fillna(method='backfill', limit=1)) + + # With downcast. + rounded = self.panel.apply(lambda x: x.apply(np.round)) + filled = rounded.fillna(method='backfill', downcast='infer') + assert_frame_equal(filled['ItemA'], + rounded['ItemA'].fillna(method='backfill', downcast='infer')) + + # Now explicitly request axis 1. + filled = self.panel.fillna(method='backfill', axis=1) + assert_frame_equal(filled['ItemA'], + self.panel['ItemA'].fillna(method='backfill', axis=0)) + + # Fill along axis 2, equivalent to filling along axis 1 of each + # DataFrame. + filled = self.panel.fillna(method='backfill', axis=2) + assert_frame_equal(filled['ItemA'], + self.panel['ItemA'].fillna(method='backfill', axis=1)) + + # Fill an empty panel. empty = self.panel.reindex(items=[]) filled = empty.fillna(0) assert_panel_equal(filled, empty) + # either method or value must be specified self.assertRaises(ValueError, self.panel.fillna) + + # method and value can not both be specified self.assertRaises(ValueError, self.panel.fillna, 5, method='ffill') + # can't pass list or tuple, only scalar self.assertRaises(TypeError, self.panel.fillna, [1, 2]) self.assertRaises(TypeError, self.panel.fillna, (1, 2)) # limit not implemented when only value is specified p = Panel(np.random.randn(3,4,5)) p.iloc[0:2,0:2,0:2] = np.nan - self.assertRaises(NotImplementedError, lambda : p.fillna(999,limit=1)) + self.assertRaises(NotImplementedError, lambda : p.fillna(999, limit=1)) + + def test_fillna_axis_0(self): + # GH 11445 + + # Forward fill along axis 0, interpolating values across DataFrames. + filled = self.panel.fillna(method='ffill', axis=0) + nan_indexes = self.panel.loc['ItemB', :, 'C'].index[ + self.panel.loc['ItemB', :, 'C'].apply(np.isnan)] + + # Values from ItemA are filled into ItemB. + assert_series_equal(filled.loc['ItemB', :, 'C'][nan_indexes], + self.panel.loc['ItemA', :, 'C'][nan_indexes]) + + # Backfill along axis 0. + filled = self.panel.fillna(method='backfill', axis=0) + + # The test data lacks values that can be backfilled on axis 0. + assert_panel_equal(filled, self.panel) + + # Reverse the panel and backfill along axis 0, to properly test + # backfill. + reverse_panel = self.panel.reindex_axis(reversed(self.panel.axes[0])) + filled = reverse_panel.fillna(method='bfill', axis=0) + nan_indexes = reverse_panel.loc['ItemB', :, 'C'].index[ + reverse_panel.loc['ItemB', :, 'C'].isnull()] + assert_series_equal(filled.loc['ItemB', :, 'C'][nan_indexes], + reverse_panel.loc['ItemA', :, 'C'][nan_indexes]) + + # Fill along axis 0 with limit. + filled = self.panel.fillna(method='ffill', axis=0, limit=1) + a_nan = self.panel.loc['ItemA', :, 'C'].index[ + self.panel.loc['ItemA', :, 'C'].apply(np.isnan)] + b_nan = self.panel.loc['ItemB', :, 'C'].index[ + self.panel.loc['ItemB', :, 'C'].apply(np.isnan)] + + # Cells that are nan in ItemB but not in ItemA remain unfilled in + # ItemC. + self.assertTrue( + filled.loc['ItemC', :, 'C'][b_nan.difference(a_nan)].apply(np.isnan).all()) + + # limit not implemented when only value is specified + panel = self.panel.copy() + panel['str'] = 'foo' + self.assertRaises(NotImplementedError, + lambda: panel.fillna(method='ffill', axis=0)) def test_ffill_bfill(self): assert_panel_equal(self.panel.ffill(), diff --git a/pandas/tests/test_panel4d.py b/pandas/tests/test_panel4d.py index 3772d4b9c272b..6f8bcebd6e591 100644 --- a/pandas/tests/test_panel4d.py +++ b/pandas/tests/test_panel4d.py @@ -909,11 +909,106 @@ def test_sort_index(self): # assert_panel_equal(sorted_panel, self.panel) def test_fillna(self): + # GH 11445 self.assertFalse(np.isfinite(self.panel4d.values).all()) filled = self.panel4d.fillna(0) self.assertTrue(np.isfinite(filled.values).all()) - self.assertRaises(NotImplementedError, self.panel4d.fillna, method='pad') + filled = self.panel4d.fillna(method='backfill') + assert_frame_equal(filled['l1']['ItemA'], + self.panel4d['l1']['ItemA'].fillna(method='backfill')) + + panel4d = self.panel4d.copy() + panel4d['str'] = 'foo' + + filled = panel4d.fillna(method='backfill') + assert_frame_equal(filled['l1']['ItemA'], + panel4d['l1']['ItemA'].fillna(method='backfill')) + + # Fill forward. + filled = self.panel4d.fillna(method='ffill') + assert_frame_equal(filled['l1']['ItemA'], + self.panel4d['l1']['ItemA'].fillna(method='ffill')) + + # With limit. + filled = self.panel4d.fillna(method='backfill', limit=1) + assert_frame_equal(filled['l1']['ItemA'], + self.panel4d['l1']['ItemA'].fillna(method='backfill', limit=1)) + + # With downcast. + rounded = self.panel4d.apply(lambda x: x.apply(np.round)) + filled = rounded.fillna(method='backfill', downcast='infer') + assert_frame_equal(filled['l1']['ItemA'], + rounded['l1']['ItemA'].fillna(method='backfill', downcast='infer')) + + # Now explicitly request axis 2. + filled = self.panel4d.fillna(method='backfill', axis=2) + assert_frame_equal(filled['l1']['ItemA'], + self.panel4d['l1']['ItemA'].fillna(method='backfill', axis=0)) + + # Fill along axis 3, equivalent to filling along axis 1 of each + # DataFrame. + filled = self.panel4d.fillna(method='backfill', axis=3) + assert_frame_equal(filled['l1']['ItemA'], + self.panel4d['l1']['ItemA'].fillna(method='backfill', axis=1)) + + # Fill an empty panel. + empty = self.panel4d.reindex(items=[]) + filled = empty.fillna(0) + assert_panel4d_equal(filled, empty) + + # either method or value must be specified + self.assertRaises(ValueError, self.panel4d.fillna) + # method and value can not both be specified + self.assertRaises(ValueError, self.panel4d.fillna, 5, method='ffill') + + # can't pass list or tuple, only scalar + self.assertRaises(TypeError, self.panel4d.fillna, [1, 2]) + self.assertRaises(TypeError, self.panel4d.fillna, (1, 2)) + + # limit not implemented when only value is specified + p = Panel4D(np.random.randn(3,4,5,6)) + p.iloc[0:2,0:2,0:2,0:2] = np.nan + self.assertRaises(NotImplementedError, lambda : p.fillna(999, limit=1)) + + def test_fillna_axis_0(self): + # GH 11445 + + # Back fill along axis 0, interpolating values across Panels + filled = self.panel4d.fillna(method='bfill', axis=0) + nan_indexes = self.panel4d.loc['l1', 'ItemB', :, 'C'].index[ + self.panel4d.loc['l1', 'ItemB', :, 'C'].apply(np.isnan)] + + # Values from ItemC are filled into ItemB. + assert_series_equal(filled.loc['l1', 'ItemB', :, 'C'][nan_indexes], + self.panel4d.loc['l1', 'ItemC', :, 'C'][nan_indexes]) + + # Forward fill along axis 0. + filled = self.panel4d.fillna(method='ffill', axis=0) + + # The test data lacks values that can be backfilled on axis 0. + assert_panel4d_equal(filled, self.panel4d) + + # Reverse the panel and backfill along axis 0, to properly test + # forward fill. + reverse_panel = self.panel4d.reindex_axis(reversed(self.panel4d.axes[0])) + filled = reverse_panel.fillna(method='ffill', axis=0) + nan_indexes = reverse_panel.loc['l3', 'ItemB', :, 'C'].index[ + reverse_panel.loc['l3', 'ItemB', :, 'C'].apply(np.isnan)] + assert_series_equal(filled.loc['l3', 'ItemB', :, 'C'][nan_indexes], + reverse_panel.loc['l1', 'ItemB', :, 'C'][nan_indexes]) + + # Fill along axis 0 with limit. + filled = self.panel4d.fillna(method='bfill', axis=0, limit=1) + c_nan = self.panel4d.loc['l1', 'ItemC', :, 'C'].index[ + self.panel4d.loc['l1', 'ItemC', :, 'C'].apply(np.isnan)] + b_nan = self.panel4d.loc['l1', 'ItemB', :, 'C'].index[ + self.panel4d.loc['l1', 'ItemB', :, 'C'].apply(np.isnan)] + + # Cells that are nan in ItemB but not in ItemC remain unfilled in + # ItemA. + self.assertTrue( + filled.loc['l1', 'ItemA', :, 'C'][b_nan.difference(c_nan)].apply(np.isnan).all()) def test_swapaxes(self): result = self.panel4d.swapaxes('labels', 'items')