diff --git a/doc/source/whatsnew/v0.15.1.txt b/doc/source/whatsnew/v0.15.1.txt index e96adc2bd9559..4656c82b0099c 100644 --- a/doc/source/whatsnew/v0.15.1.txt +++ b/doc/source/whatsnew/v0.15.1.txt @@ -227,3 +227,5 @@ Bug Fixes - Fixed a bug where plotting a column ``y`` and specifying a label would mutate the index name of the original DataFrame (:issue:`8494`) - Bug in ``date_range`` where partially-specified dates would incorporate current date (:issue:`6961`) + +- Fixed a bug that prevented setting values in a mixed-type Panel4D diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 71668a73d9286..a1ddff009006d 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -21,10 +21,14 @@ from pandas.core.common import (isnull, notnull, is_list_like, _values_from_object, _maybe_promote, _maybe_box_datetimelike, ABCSeries, - SettingWithCopyError, SettingWithCopyWarning) + SettingWithCopyError, SettingWithCopyWarning, + CategoricalDtype) import pandas.core.nanops as nanops from pandas.util.decorators import Appender, Substitution, deprecate_kwarg from pandas.core import config +from pandas.core.categorical import Categorical + +from itertools import product # goal is to be able to define the docs close to function, while still being # able to share @@ -2237,25 +2241,24 @@ def convert_objects(self, convert_dates=True, convert_numeric=False, #---------------------------------------------------------------------- # Filling NA's - def fillna(self, value=None, method=None, axis=0, inplace=False, + def fillna(self, value=None, method=None, axis=None, inplace=False, limit=None, downcast=None): """ Fill NA/NaN values using the specified method Parameters ---------- - method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None - Method to use for filling holes in reindexed Series - pad / ffill: propagate last valid observation forward to next valid - backfill / bfill: use NEXT valid observation to fill gap value : scalar, dict, Series, or DataFrame Value to use to fill holes (e.g. 0), alternately a dict/Series/DataFrame of values specifying which value to use for each index (for a Series) or column (for a DataFrame). (values not in the dict/Series/DataFrame will not be filled). This value cannot be a list. - axis : {0, 1}, default 0 - * 0: fill column-by-column - * 1: fill row-by-row + method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None + Method to use for filling holes in reindexed Series + pad / ffill: propagate last valid observation forward to next valid + backfill / bfill: use NEXT valid observation to fill gap + axis : {0, 1, 2, 3}, defaults to the stat axis + The stat axis is 0 for Series and DataFrame, 1 for Panel, and 2 for Panel4D inplace : boolean, default False If True, fill in place. Note: this will modify any other views on this object, (e.g. a no-copy slice for a column in a @@ -2263,7 +2266,7 @@ def fillna(self, value=None, method=None, axis=0, inplace=False, limit : int, default None Maximum size gap to forward or backward fill downcast : dict, default is None - a dict of item->dtype of what to downcast if possible, + A dict of item->dtype of what to downcast if possible, or the string 'infer' which will try to downcast to an appropriate equal type (e.g. float64 to int64 if possible) @@ -2275,54 +2278,47 @@ def fillna(self, value=None, method=None, axis=0, inplace=False, ------- filled : same type as caller """ - if isinstance(value, (list, tuple)): - raise TypeError('"value" parameter must be a scalar or dict, but ' - 'you passed a "{0}"'.format(type(value).__name__)) self._consolidate_inplace() - axis = self._get_axis_number(axis) - method = com._clean_fill_method(method) + if axis is None: + axis = self._stat_axis_number + else: + axis = self._get_axis_number(axis) from pandas import DataFrame if value is None: if method is None: raise ValueError('must specify a fill method or value') - if self._is_mixed_type and axis == 1: - if inplace: - raise NotImplementedError() - result = self.T.fillna(method=method, limit=limit).T - - # need to downcast here because of all of the transposes - result._data = result._data.downcast() - return result - - # > 3d - if self.ndim > 3: - raise NotImplementedError( - 'Cannot fillna with a method for > 3dims' - ) - - # 3d - elif self.ndim == 3: + method = com._clean_fill_method(method) - # fill in 2d chunks - result = dict([(col, s.fillna(method=method, value=value)) - for col, s in compat.iteritems(self)]) - return self._constructor.from_dict(result).__finalize__(self) + off_axes = list(range(self.ndim)) + off_axes.remove(axis) + expanded = [list(range(self.shape[x])) for x in off_axes] + frame = self if inplace else self.copy() + for axes_prod in product(*expanded): + slicer = list(axes_prod) + slicer.insert(axis, slice(None)) + sl = tuple(slicer) + piece = frame.iloc[sl] + new_data = piece._data.interpolate(method=method, + limit=limit, + inplace=True, + coerce=True) + frame.iloc[sl] = piece._constructor(new_data) + + new_data = frame._data + if downcast: + new_data = new_data.downcast(dtypes=downcast) - # 2d or less - method = com._clean_fill_method(method) - new_data = self._data.interpolate(method=method, - axis=axis, - limit=limit, - inplace=inplace, - coerce=True, - downcast=downcast) else: if method is not None: raise ValueError('cannot specify both a fill method and value') + if isinstance(value, (list, tuple)): + raise TypeError('"value" parameter must be a scalar or dict, but ' + 'you passed a "{0}"'.format(type(value).__name__)) + if len(self._get_axis(axis)) == 0: return self @@ -2368,12 +2364,12 @@ def fillna(self, value=None, method=None, axis=0, inplace=False, else: return self._constructor(new_data).__finalize__(self) - def ffill(self, axis=0, inplace=False, limit=None, downcast=None): + def ffill(self, axis=None, inplace=False, limit=None, downcast=None): "Synonym for NDFrame.fillna(method='ffill')" return self.fillna(method='ffill', axis=axis, inplace=inplace, limit=limit, downcast=downcast) - def bfill(self, axis=0, inplace=False, limit=None, downcast=None): + def bfill(self, axis=None, inplace=False, limit=None, downcast=None): "Synonym for NDFrame.fillna(method='bfill')" return self.fillna(method='bfill', axis=axis, inplace=inplace, limit=limit, downcast=downcast) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 954acb0f95159..e6f91d2ae6f44 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -355,7 +355,7 @@ def _setitem_with_indexer(self, indexer, value): # if we have a partial multiindex, then need to adjust the plane # indexer here if (len(labels) == 1 and - isinstance(self.obj[labels[0]].index, MultiIndex)): + isinstance(self.obj[labels[0]].axes[0], MultiIndex)): item = labels[0] obj = self.obj[item] index = obj.index @@ -421,7 +421,7 @@ def can_do_equal_len(): l = len(value) item = labels[0] - index = self.obj[item].index + index = self.obj[item].axes[0] # equal len list/ndarray if len(index) == l: diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 14e4e32acae9f..c70d2b599e038 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -1356,20 +1356,90 @@ def test_fillna(self): assert_frame_equal(filled['ItemA'], panel['ItemA'].fillna(method='backfill')) + # Fill forward. + filled = self.panel.fillna(method='ffill') + assert_frame_equal(filled['ItemA'], + self.panel['ItemA'].fillna(method='ffill')) + + # With limit. + filled = self.panel.fillna(method='backfill', limit=1) + assert_frame_equal(filled['ItemA'], + self.panel['ItemA'].fillna(method='backfill', limit=1)) + + # With downcast. + rounded = self.panel.apply(lambda x: x.apply(np.round)) + filled = rounded.fillna(method='backfill', downcast='infer') + assert_frame_equal(filled['ItemA'], + rounded['ItemA'].fillna(method='backfill', downcast='infer')) + + # Now explicitly request axis 1. + filled = self.panel.fillna(method='backfill', axis=1) + assert_frame_equal(filled['ItemA'], + self.panel['ItemA'].fillna(method='backfill', axis=0)) + + # Fill along axis 2, equivalent to filling along axis 1 of each + # DataFrame. + filled = self.panel.fillna(method='backfill', axis=2) + assert_frame_equal(filled['ItemA'], + self.panel['ItemA'].fillna(method='backfill', axis=1)) + + # Fill an empty panel. empty = self.panel.reindex(items=[]) filled = empty.fillna(0) assert_panel_equal(filled, empty) + # either method or value must be specified self.assertRaises(ValueError, self.panel.fillna) + # method and value can not both be specified self.assertRaises(ValueError, self.panel.fillna, 5, method='ffill') + # can't pass list or tuple, only scalar self.assertRaises(TypeError, self.panel.fillna, [1, 2]) self.assertRaises(TypeError, self.panel.fillna, (1, 2)) # limit not implemented when only value is specified p = Panel(np.random.randn(3,4,5)) p.iloc[0:2,0:2,0:2] = np.nan - self.assertRaises(NotImplementedError, lambda : p.fillna(999,limit=1)) + self.assertRaises(NotImplementedError, lambda : p.fillna(999, limit=1)) + + def test_fillna_axis_0(self): + # GH 8395 + + # Forward fill along axis 0, interpolating values across DataFrames. + filled = self.panel.fillna(method='ffill', axis=0) + nan_indexes = self.panel['ItemB']['C'].index[ + self.panel['ItemB']['C'].apply(np.isnan)] + + # Values from ItemA are filled into ItemB. + assert_series_equal(filled['ItemB']['C'][nan_indexes], + self.panel['ItemA']['C'][nan_indexes]) + + # Backfill along axis 0. + filled = self.panel.fillna(method='backfill', axis=0) + + # The test data lacks values that can be backfilled on axis 0. + assert_panel_equal(filled, self.panel) + + # Reverse the panel and backfill along axis 0, to properly test + # backfill. + reverse_panel = self.panel.reindex_axis(reversed(self.panel.axes[0])) + filled = reverse_panel.fillna(method='bfill', axis=0) + nan_indexes = reverse_panel['ItemB']['C'].index[ + reverse_panel['ItemB']['C'].apply(np.isnan)] + assert_series_equal(filled['ItemB']['C'][nan_indexes], + reverse_panel['ItemA']['C'][nan_indexes]) + + # Fill along axis 0 with limit. + filled = self.panel.fillna(method='ffill', axis=0, limit=1) + a_nan = self.panel['ItemA']['C'].index[ + self.panel['ItemA']['C'].apply(np.isnan)] + b_nan = self.panel['ItemB']['C'].index[ + self.panel['ItemB']['C'].apply(np.isnan)] + + # Cells that are nan in ItemB but not in ItemA remain unfilled in + # ItemC. + self.assertTrue( + filled['ItemC']['C'][b_nan.diff(a_nan)].apply(np.isnan).all()) def test_ffill_bfill(self): assert_panel_equal(self.panel.ffill(), diff --git a/pandas/tests/test_panel4d.py b/pandas/tests/test_panel4d.py index e88a8c3b2874c..4837b94fe3d53 100644 --- a/pandas/tests/test_panel4d.py +++ b/pandas/tests/test_panel4d.py @@ -845,11 +845,107 @@ def test_sort_index(self): # assert_panel_equal(sorted_panel, self.panel) def test_fillna(self): + # GH 8395 self.assertFalse(np.isfinite(self.panel4d.values).all()) filled = self.panel4d.fillna(0) self.assertTrue(np.isfinite(filled.values).all()) - self.assertRaises(NotImplementedError, self.panel4d.fillna, method='pad') + filled = self.panel4d.fillna(method='backfill') + assert_frame_equal(filled['l1']['ItemA'], + self.panel4d['l1']['ItemA'].fillna(method='backfill')) + + panel4d = self.panel4d.copy() + panel4d['str'] = 'foo' + + filled = panel4d.fillna(method='backfill') + assert_frame_equal(filled['l1']['ItemA'], + panel4d['l1']['ItemA'].fillna(method='backfill')) + + # Fill forward. + filled = self.panel4d.fillna(method='ffill') + assert_frame_equal(filled['l1']['ItemA'], + self.panel4d['l1']['ItemA'].fillna(method='ffill')) + + # With limit. + filled = self.panel4d.fillna(method='backfill', limit=1) + assert_frame_equal(filled['l1']['ItemA'], + self.panel4d['l1']['ItemA'].fillna(method='backfill', limit=1)) + + # With downcast. + rounded = self.panel4d.apply(lambda x: x.apply(np.round)) + filled = rounded.fillna(method='backfill', downcast='infer') + assert_frame_equal(filled['l1']['ItemA'], + rounded['l1']['ItemA'].fillna(method='backfill', downcast='infer')) + + # Now explicitly request axis 2. + filled = self.panel4d.fillna(method='backfill', axis=2) + assert_frame_equal(filled['l1']['ItemA'], + self.panel4d['l1']['ItemA'].fillna(method='backfill', axis=0)) + + # Fill along axis 3, equivalent to filling along axis 1 of each + # DataFrame. + filled = self.panel4d.fillna(method='backfill', axis=3) + assert_frame_equal(filled['l1']['ItemA'], + self.panel4d['l1']['ItemA'].fillna(method='backfill', axis=1)) + + # Fill an empty panel. + empty = self.panel4d.reindex(items=[]) + filled = empty.fillna(0) + assert_panel4d_equal(filled, empty) + + # either method or value must be specified + self.assertRaises(ValueError, self.panel4d.fillna) + # method and value can not both be specified + self.assertRaises(ValueError, self.panel4d.fillna, 5, method='ffill') + + # can't pass list or tuple, only scalar + self.assertRaises(TypeError, self.panel4d.fillna, [1, 2]) + self.assertRaises(TypeError, self.panel4d.fillna, (1, 2)) + + # limit not implemented when only value is specified + p = Panel4D(np.random.randn(3,4,5,6)) + p.iloc[0:2,0:2,0:2,0:2] = np.nan + self.assertRaises(NotImplementedError, lambda : p.fillna(999, limit=1)) + + def test_fillna_axis_0(self): + # GH 8395 + + # Back fill along axis 0, interpolating values across Panels + filled = self.panel4d.fillna(method='bfill', axis=0) + nan_indexes = self.panel4d['l1']['ItemB']['C'].index[ + self.panel4d['l1']['ItemB']['C'].apply(np.isnan)] + + # Values from ItemC are filled into ItemB. + assert_series_equal(filled['l1']['ItemB']['C'][nan_indexes], + self.panel4d['l1']['ItemC']['C'][nan_indexes]) + + # Forward fill along axis 0. + filled = self.panel4d.fillna(method='ffill', axis=0) + + # The test data lacks values that can be backfilled on axis 0. + assert_panel4d_equal(filled, self.panel4d) + + # Reverse the panel and backfill along axis 0, to properly test + # forward fill. + reverse_panel = self.panel4d.reindex_axis(reversed(self.panel4d.axes[0])) + filled = reverse_panel.fillna(method='ffill', axis=0) + nan_indexes = reverse_panel['l3']['ItemB']['C'].index[ + reverse_panel['l3']['ItemB']['C'].apply(np.isnan)] + assert_series_equal(filled['l3']['ItemB']['C'][nan_indexes], + reverse_panel['l1']['ItemB']['C'][nan_indexes]) + + # Fill along axis 0 with limit. + filled = self.panel4d.fillna(method='bfill', axis=0, limit=1) + c_nan = self.panel4d['l1']['ItemC']['C'].index[ + self.panel4d['l1']['ItemC']['C'].apply(np.isnan)] + b_nan = self.panel4d['l1']['ItemB']['C'].index[ + self.panel4d['l1']['ItemB']['C'].apply(np.isnan)] + + # Cells that are nan in ItemB but not in ItemC remain unfilled in + # ItemA. + self.assertTrue( + filled['l1']['ItemA']['C'][b_nan.diff(c_nan)].apply(np.isnan).all()) + def test_swapaxes(self): result = self.panel4d.swapaxes('labels', 'items')