diff --git a/doc/source/whatsnew/v0.17.1.txt b/doc/source/whatsnew/v0.17.1.txt index 28129287d51af..13389b603ed6c 100755 --- a/doc/source/whatsnew/v0.17.1.txt +++ b/doc/source/whatsnew/v0.17.1.txt @@ -25,6 +25,7 @@ Enhancements objects for the ``filepath_or_buffer`` argument. (:issue:`11033`) - ``DataFrame`` now uses the fields of a ``namedtuple`` as columns, if columns are not supplied (:issue:`11181`) - Improve the error message displayed in :func:`pandas.io.gbq.to_gbq` when the DataFrame does not match the schema of the destination table (:issue:`11359`) +- Akima 1D interpolation is now supported (:issue:`7588`) .. _whatsnew_0171.api: @@ -118,3 +119,7 @@ Bug Fixes - Bug in ``to_excel`` with openpyxl 2.2+ and merging (:issue:`11408`) - Bug in ``DataFrame.to_dict()`` produces a ``np.datetime64`` object instead of ``Timestamp`` when only datetime is present in data (:issue:`11327`) + +- Bug in ``Panel.fillna()`` does not fill across axis 0 (:issue:`8251`) + +- Bug in ``Panel.fillna()`` loses index names (:issue:`3570`) diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index e304684036766..c6b33557951d5 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -10,7 +10,7 @@ from pandas.core.algorithms import factorize from pandas.core.base import PandasObject, PandasDelegate import pandas.core.common as com -from pandas.core.missing import interpolate_2d +from pandas.core.missing import pad from pandas.util.decorators import cache_readonly, deprecate_kwarg from pandas.core.common import (ABCSeries, ABCIndexClass, ABCPeriodIndex, ABCCategoricalIndex, @@ -1313,8 +1313,7 @@ def fillna(self, value=None, method=None, limit=None): if method is not None: values = self.to_dense().reshape(-1, len(self)) - values = interpolate_2d( - values, method, 0, None, value).astype(self.categories.dtype)[0] + values = pad(values, method, 0, None, value).astype(self.categories.dtype)[0] values = _get_codes_for_values(values, self.categories) else: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index f46296bb6f70c..34e9047a9fdd2 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2774,7 +2774,7 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, # set the default here, so functions examining the signaure # can detect if something was set (e.g. in groupby) (GH9221) if axis is None: - axis = 0 + axis = self._stat_axis_name axis = self._get_axis_number(axis) method = mis._clean_fill_method(method) @@ -2782,31 +2782,19 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, if value is None: if method is None: raise ValueError('must specify a fill method or value') - if self._is_mixed_type and axis == 1: + if self._is_mixed_type: + if (self.ndim > 2) and (axis == 0): + raise NotImplementedError('cannot fill across axis 0 for mixed dtypes') if inplace: - raise NotImplementedError() - result = self.T.fillna(method=method, limit=limit).T - - # need to downcast here because of all of the transposes - result._data = result._data.downcast() - - return result - - # > 3d - if self.ndim > 3: - raise NotImplementedError( - 'Cannot fillna with a method for > 3dims' - ) + raise NotImplementedError('cannot fill inplace for mixed dtypes') + elif (self.ndim == 2) and (axis == 1): + result = self.T.fillna(method=method, limit=limit).T - # 3d - elif self.ndim == 3: + # need to downcast here because of all of the transposes + result._data = result._data.downcast() - # fill in 2d chunks - result = dict([(col, s.fillna(method=method, value=value)) - for col, s in compat.iteritems(self)]) - return self._constructor.from_dict(result).__finalize__(self) + return result - # 2d or less method = mis._clean_fill_method(method) new_data = self._data.interpolate(method=method, axis=axis, diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 1b08140ebec09..08048d684e407 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -910,12 +910,12 @@ def _interpolate_with_fill(self, method='pad', axis=0, inplace=False, values = self.values if inplace else self.values.copy() values, _, fill_value, _ = self._try_coerce_args(values, fill_value) values = self._try_operate(values) - values = mis.interpolate_2d(values, - method=method, - axis=axis, - limit=limit, - fill_value=fill_value, - dtype=self.dtype) + values = mis.pad(values, + method=method, + axis=axis, + limit=limit, + fill_value=fill_value, + dtype=self.dtype) values = self._try_coerce_result(values) blocks = [self.make_block(values, @@ -950,8 +950,8 @@ def func(x): # process a 1-d slice, returning it # should the axis argument be handled below in apply_along_axis? - # i.e. not an arg to mis.interpolate_1d - return mis.interpolate_1d(index, x, method=method, limit=limit, + # i.e. not an arg to mis.interpolate + return mis.interpolate(index, x, method=method, limit=limit, limit_direction=limit_direction, fill_value=fill_value, bounds_error=False, **kwargs) @@ -2358,7 +2358,7 @@ def make_block_same_class(self, values, placement, def interpolate(self, method='pad', axis=0, inplace=False, limit=None, fill_value=None, **kwargs): - values = mis.interpolate_2d( + values = mis.pad( self.values.to_dense(), method, axis, limit, fill_value) return self.make_block_same_class(values=values, placement=self.mgr_locs) @@ -3774,7 +3774,7 @@ def reindex(self, new_axis, indexer=None, method=None, fill_value=None, # fill if needed if method is not None or limit is not None: - new_values = mis.interpolate_2d(new_values, method=method, + new_values = mis.pad(new_values, method=method, limit=limit, fill_value=fill_value) if self._block.is_sparse: diff --git a/pandas/core/missing.py b/pandas/core/missing.py index f1143ad808b91..184faf1f0e7c3 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -39,7 +39,7 @@ def _clean_interp_method(method, **kwargs): valid = ['linear', 'time', 'index', 'values', 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'barycentric', 'polynomial', 'krogh', 'piecewise_polynomial', - 'pchip', 'spline'] + 'pchip', 'spline', 'akima'] if method in ('spline', 'polynomial') and order is None: raise ValueError("You must specify the order of the spline or " "polynomial.") @@ -49,9 +49,9 @@ def _clean_interp_method(method, **kwargs): return method -def interpolate_1d(xvalues, yvalues, method='linear', limit=None, - limit_direction='forward', - fill_value=None, bounds_error=False, order=None, **kwargs): +def interpolate(xvalues, yvalues, method='linear', limit=None, + limit_direction='forward', + fill_value=None, bounds_error=False, order=None, **kwargs): """ Logic for the 1-d interpolation. The result should be 1-d, inputs xvalues and yvalues will each be 1-d arrays of the same length. @@ -144,7 +144,7 @@ def _interp_limit(invalid, fw_limit, bw_limit): sp_methods = ['nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'barycentric', 'krogh', 'spline', 'polynomial', - 'piecewise_polynomial', 'pchip'] + 'piecewise_polynomial', 'pchip', 'akima'] if method in sp_methods: inds = np.asarray(xvalues) # hack for DatetimeIndex, #1646 @@ -156,6 +156,8 @@ def _interp_limit(invalid, fw_limit, bw_limit): bounds_error=bounds_error, order=order, **kwargs) result[violate_limit] = np.nan return result + else: + raise ValueError('interpolation method not found') def _interpolate_scipy_wrapper(x, y, new_x, method, fill_value=None, @@ -214,20 +216,51 @@ def _interpolate_scipy_wrapper(x, y, new_x, method, fill_value=None, y = y.copy() if not new_x.flags.writeable: new_x = new_x.copy() - method = alt_methods[method] - new_y = method(x, y, new_x, **kwargs) + if method == 'akima': + try: + interpolator = interpolate.Akima1DInterpolator(x, y) + except AttributeError: + raise ImportError("Your version of scipy does not support " + "Akima interpolation" ) + new_y = interpolator(new_x) + else: + method = alt_methods[method] + new_y = method(x, y, new_x, **kwargs) return new_y -def interpolate_2d(values, method='pad', axis=0, limit=None, fill_value=None, dtype=None): - """ perform an actual interpolation of values, values will be make 2-d if - needed fills inplace, returns the result +def pad(values, method='pad', axis=0, limit=None, fill_value=None, dtype=None): + """ + Perform an actual interpolation of values. 1-d values will be made 2-d temporarily. + Returns the result """ + ndim = values.ndim + shape = values.shape + + func = partial(pad, method=method, limit=limit, fill_value=fill_value, dtype=dtype) + + if ndim > 2: + if ndim == 3: + if axis == 0: + for n in range(shape[1]): + values[:,n] = func(values[:,n], axis=1) + else: + for n in range(shape[0]): + values[n] = func(values[n], axis=(1 if axis == 1 else 0)) + else: + if axis == 0: + for n in range(shape[1]): + values[:,n] = func(values[:,n], axis=0) + else: + for n in range(shape[0]): + values[n] = func(values[n], axis=axis-1) + + return values + transf = (lambda x: x) if axis == 0 else (lambda x: x.T) # reshape a 1 dim if needed - ndim = values.ndim if values.ndim == 1: if axis != 0: # pragma: no cover raise AssertionError("cannot interpolate on a ndim == 1 with " diff --git a/pandas/tests/test_generic.py b/pandas/tests/test_generic.py index d29673e96ecdd..ab240ea90a3f3 100644 --- a/pandas/tests/test_generic.py +++ b/pandas/tests/test_generic.py @@ -1167,6 +1167,15 @@ def test_interp_alt_scipy(self): expected.ix[5,'A'] = 6.125 assert_frame_equal(result, expected) + try: + from scipy.interpolate import Akima1DInterpolator + except ImportError: + raise nose.SkipTest('scipy.interpolate.Akima1DInterpolator missing') + result = df.interpolate(method='akima') + expected.ix[2,'A'] = 3 + expected.ix[5,'A'] = 6 + assert_frame_equal(result, expected) + def test_interp_rowwise(self): df = DataFrame({0: [1, 2, np.nan, 4], 1: [2, 3, 4, np.nan], diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 1f8bcf8c9879f..0c092f89c4090 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -1454,20 +1454,95 @@ def test_fillna(self): assert_frame_equal(filled['ItemA'], panel['ItemA'].fillna(method='backfill')) + # Fill forward. + filled = self.panel.fillna(method='ffill') + assert_frame_equal(filled['ItemA'], + self.panel['ItemA'].fillna(method='ffill')) + + # With limit. + filled = self.panel.fillna(method='backfill', limit=1) + assert_frame_equal(filled['ItemA'], + self.panel['ItemA'].fillna(method='backfill', limit=1)) + + # With downcast. + rounded = self.panel.apply(lambda x: x.apply(np.round)) + filled = rounded.fillna(method='backfill', downcast='infer') + assert_frame_equal(filled['ItemA'], + rounded['ItemA'].fillna(method='backfill', downcast='infer')) + + # Now explicitly request axis 1. + filled = self.panel.fillna(method='backfill', axis=1) + assert_frame_equal(filled['ItemA'], + self.panel['ItemA'].fillna(method='backfill', axis=0)) + + # Fill along axis 2, equivalent to filling along axis 1 of each + # DataFrame. + filled = self.panel.fillna(method='backfill', axis=2) + assert_frame_equal(filled['ItemA'], + self.panel['ItemA'].fillna(method='backfill', axis=1)) + + # Fill an empty panel. empty = self.panel.reindex(items=[]) filled = empty.fillna(0) assert_panel_equal(filled, empty) + # either method or value must be specified self.assertRaises(ValueError, self.panel.fillna) + # method and value can not both be specified self.assertRaises(ValueError, self.panel.fillna, 5, method='ffill') + # can't pass list or tuple, only scalar self.assertRaises(TypeError, self.panel.fillna, [1, 2]) self.assertRaises(TypeError, self.panel.fillna, (1, 2)) # limit not implemented when only value is specified p = Panel(np.random.randn(3,4,5)) p.iloc[0:2,0:2,0:2] = np.nan - self.assertRaises(NotImplementedError, lambda : p.fillna(999,limit=1)) + self.assertRaises(NotImplementedError, lambda : p.fillna(999, limit=1)) + + def test_fillna_axis_0(self): + # GH 8395 + + # Forward fill along axis 0, interpolating values across DataFrames. + filled = self.panel.fillna(method='ffill', axis=0) + nan_indexes = self.panel['ItemB']['C'].index[ + self.panel['ItemB']['C'].apply(np.isnan)] + + # Values from ItemA are filled into ItemB. + assert_series_equal(filled['ItemB']['C'][nan_indexes], + self.panel['ItemA']['C'][nan_indexes]) + + # Backfill along axis 0. + filled = self.panel.fillna(method='backfill', axis=0) + + # The test data lacks values that can be backfilled on axis 0. + assert_panel_equal(filled, self.panel) + + # Reverse the panel and backfill along axis 0, to properly test + # backfill. + reverse_panel = self.panel.reindex_axis(reversed(self.panel.axes[0])) + filled = reverse_panel.fillna(method='bfill', axis=0) + nan_indexes = reverse_panel['ItemB']['C'].index[ + reverse_panel['ItemB']['C'].apply(np.isnan)] + assert_series_equal(filled['ItemB']['C'][nan_indexes], + reverse_panel['ItemA']['C'][nan_indexes]) + + # Fill along axis 0 with limit. + filled = self.panel.fillna(method='ffill', axis=0, limit=1) + a_nan = self.panel['ItemA']['C'].index[ + self.panel['ItemA']['C'].apply(np.isnan)] + b_nan = self.panel['ItemB']['C'].index[ + self.panel['ItemB']['C'].apply(np.isnan)] + + # Cells that are nan in ItemB but not in ItemA remain unfilled in + # ItemC. + self.assertTrue( + filled['ItemC']['C'][b_nan.diff(a_nan)].apply(np.isnan).all()) + + # limit not implemented when only value is specified + panel = self.panel.copy() + panel['str'] = 'foo' + self.assertRaises(NotImplementedError, lambda : panel.fillna(method='ffill', axis=0)) def test_ffill_bfill(self): assert_panel_equal(self.panel.ffill(), diff --git a/pandas/tests/test_panel4d.py b/pandas/tests/test_panel4d.py index 3772d4b9c272b..dffb0ccc6effe 100644 --- a/pandas/tests/test_panel4d.py +++ b/pandas/tests/test_panel4d.py @@ -909,11 +909,106 @@ def test_sort_index(self): # assert_panel_equal(sorted_panel, self.panel) def test_fillna(self): + # GH 8395 self.assertFalse(np.isfinite(self.panel4d.values).all()) filled = self.panel4d.fillna(0) self.assertTrue(np.isfinite(filled.values).all()) - self.assertRaises(NotImplementedError, self.panel4d.fillna, method='pad') + filled = self.panel4d.fillna(method='backfill') + assert_frame_equal(filled['l1']['ItemA'], + self.panel4d['l1']['ItemA'].fillna(method='backfill')) + + panel4d = self.panel4d.copy() + panel4d['str'] = 'foo' + + filled = panel4d.fillna(method='backfill') + assert_frame_equal(filled['l1']['ItemA'], + panel4d['l1']['ItemA'].fillna(method='backfill')) + + # Fill forward. + filled = self.panel4d.fillna(method='ffill') + assert_frame_equal(filled['l1']['ItemA'], + self.panel4d['l1']['ItemA'].fillna(method='ffill')) + + # With limit. + filled = self.panel4d.fillna(method='backfill', limit=1) + assert_frame_equal(filled['l1']['ItemA'], + self.panel4d['l1']['ItemA'].fillna(method='backfill', limit=1)) + + # With downcast. + rounded = self.panel4d.apply(lambda x: x.apply(np.round)) + filled = rounded.fillna(method='backfill', downcast='infer') + assert_frame_equal(filled['l1']['ItemA'], + rounded['l1']['ItemA'].fillna(method='backfill', downcast='infer')) + + # Now explicitly request axis 2. + filled = self.panel4d.fillna(method='backfill', axis=2) + assert_frame_equal(filled['l1']['ItemA'], + self.panel4d['l1']['ItemA'].fillna(method='backfill', axis=0)) + + # Fill along axis 3, equivalent to filling along axis 1 of each + # DataFrame. + filled = self.panel4d.fillna(method='backfill', axis=3) + assert_frame_equal(filled['l1']['ItemA'], + self.panel4d['l1']['ItemA'].fillna(method='backfill', axis=1)) + + # Fill an empty panel. + empty = self.panel4d.reindex(items=[]) + filled = empty.fillna(0) + assert_panel4d_equal(filled, empty) + + # either method or value must be specified + self.assertRaises(ValueError, self.panel4d.fillna) + # method and value can not both be specified + self.assertRaises(ValueError, self.panel4d.fillna, 5, method='ffill') + + # can't pass list or tuple, only scalar + self.assertRaises(TypeError, self.panel4d.fillna, [1, 2]) + self.assertRaises(TypeError, self.panel4d.fillna, (1, 2)) + + # limit not implemented when only value is specified + p = Panel4D(np.random.randn(3,4,5,6)) + p.iloc[0:2,0:2,0:2,0:2] = np.nan + self.assertRaises(NotImplementedError, lambda : p.fillna(999, limit=1)) + + def test_fillna_axis_0(self): + # GH 8395 + + # Back fill along axis 0, interpolating values across Panels + filled = self.panel4d.fillna(method='bfill', axis=0) + nan_indexes = self.panel4d['l1']['ItemB']['C'].index[ + self.panel4d['l1']['ItemB']['C'].apply(np.isnan)] + + # Values from ItemC are filled into ItemB. + assert_series_equal(filled['l1']['ItemB']['C'][nan_indexes], + self.panel4d['l1']['ItemC']['C'][nan_indexes]) + + # Forward fill along axis 0. + filled = self.panel4d.fillna(method='ffill', axis=0) + + # The test data lacks values that can be backfilled on axis 0. + assert_panel4d_equal(filled, self.panel4d) + + # Reverse the panel and backfill along axis 0, to properly test + # forward fill. + reverse_panel = self.panel4d.reindex_axis(reversed(self.panel4d.axes[0])) + filled = reverse_panel.fillna(method='ffill', axis=0) + nan_indexes = reverse_panel['l3']['ItemB']['C'].index[ + reverse_panel['l3']['ItemB']['C'].apply(np.isnan)] + assert_series_equal(filled['l3']['ItemB']['C'][nan_indexes], + reverse_panel['l1']['ItemB']['C'][nan_indexes]) + + # Fill along axis 0 with limit. + filled = self.panel4d.fillna(method='bfill', axis=0, limit=1) + c_nan = self.panel4d['l1']['ItemC']['C'].index[ + self.panel4d['l1']['ItemC']['C'].apply(np.isnan)] + b_nan = self.panel4d['l1']['ItemB']['C'].index[ + self.panel4d['l1']['ItemB']['C'].apply(np.isnan)] + + # Cells that are nan in ItemB but not in ItemC remain unfilled in + # ItemA. + self.assertTrue( + filled['l1']['ItemA']['C'][b_nan.diff(c_nan)].apply(np.isnan).all()) def test_swapaxes(self): result = self.panel4d.swapaxes('labels', 'items')