From b6692a222eecc915cfc51a4eac93a51c4980c9be Mon Sep 17 00:00:00 2001 From: Nick Stahl Date: Sat, 1 Nov 2014 13:38:24 -0400 Subject: [PATCH 01/27] BUG: setitem fails on mixed-type Panel4D --- pandas/core/indexing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 954acb0f95159..048e4af20d02f 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -355,7 +355,7 @@ def _setitem_with_indexer(self, indexer, value): # if we have a partial multiindex, then need to adjust the plane # indexer here if (len(labels) == 1 and - isinstance(self.obj[labels[0]].index, MultiIndex)): + isinstance(self.obj[labels[0]].axes[0], MultiIndex)): item = labels[0] obj = self.obj[item] index = obj.index From 6082542916416477f361483767249ffcd2c9450e Mon Sep 17 00:00:00 2001 From: Nick Stahl Date: Sat, 1 Nov 2014 14:26:28 -0400 Subject: [PATCH 02/27] added note to whatsnew --- doc/source/whatsnew/v0.15.1.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/source/whatsnew/v0.15.1.txt b/doc/source/whatsnew/v0.15.1.txt index e96adc2bd9559..4656c82b0099c 100644 --- a/doc/source/whatsnew/v0.15.1.txt +++ b/doc/source/whatsnew/v0.15.1.txt @@ -227,3 +227,5 @@ Bug Fixes - Fixed a bug where plotting a column ``y`` and specifying a label would mutate the index name of the original DataFrame (:issue:`8494`) - Bug in ``date_range`` where partially-specified dates would incorporate current date (:issue:`6961`) + +- Fixed a bug that prevented setting values in a mixed-type Panel4D From a2de1df086d3c38529e6845ad553d11277437eb2 Mon Sep 17 00:00:00 2001 From: Nick Stahl Date: Thu, 25 Sep 2014 21:24:31 -0700 Subject: [PATCH 03/27] BUG: fix panel fillna ignoring axis parameter --- pandas/core/generic.py | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 71668a73d9286..87a279d1c60aa 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2244,18 +2244,16 @@ def fillna(self, value=None, method=None, axis=0, inplace=False, Parameters ---------- - method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None - Method to use for filling holes in reindexed Series - pad / ffill: propagate last valid observation forward to next valid - backfill / bfill: use NEXT valid observation to fill gap value : scalar, dict, Series, or DataFrame Value to use to fill holes (e.g. 0), alternately a dict/Series/DataFrame of values specifying which value to use for each index (for a Series) or column (for a DataFrame). (values not in the dict/Series/DataFrame will not be filled). This value cannot be a list. - axis : {0, 1}, default 0 - * 0: fill column-by-column - * 1: fill row-by-row + method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None + Method to use for filling holes in reindexed Series + pad / ffill: propagate last valid observation forward to next valid + backfill / bfill: use NEXT valid observation to fill gap + axis : {0, 1, 2}, default 0 inplace : boolean, default False If True, fill in place. Note: this will modify any other views on this object, (e.g. a no-copy slice for a column in a @@ -2305,11 +2303,21 @@ def fillna(self, value=None, method=None, axis=0, inplace=False, # 3d elif self.ndim == 3: + if axis == 0: + data = self.transpose(1, 0, 2) + result = dict([(col, s.fillna(method=method, value=value, + axis=0, limit=limit)) + for col, s in compat.iteritems(data)]) + result = self._constructor.from_dict(result) + result = result.transpose(1, 0, 2) + + else: + result = dict([(col, s.fillna(method=method, value=value, + axis=axis-1, limit=limit)) + for col, s in compat.iteritems(self)]) + result = self._constructor.from_dict(result) - # fill in 2d chunks - result = dict([(col, s.fillna(method=method, value=value)) - for col, s in compat.iteritems(self)]) - return self._constructor.from_dict(result).__finalize__(self) + return result.__finalize__(self) # 2d or less method = com._clean_fill_method(method) From 76998e968c309d16565e05a81ea8603d3087d4be Mon Sep 17 00:00:00 2001 From: Nick Stahl Date: Thu, 25 Sep 2014 22:14:45 -0700 Subject: [PATCH 04/27] added/fixed tests and re-ordered _is_mixed_type check --- pandas/core/generic.py | 19 ++++++++++--------- pandas/tests/test_panel.py | 7 +++++-- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 87a279d1c60aa..9f30926e6b633 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2285,15 +2285,6 @@ def fillna(self, value=None, method=None, axis=0, inplace=False, if value is None: if method is None: raise ValueError('must specify a fill method or value') - if self._is_mixed_type and axis == 1: - if inplace: - raise NotImplementedError() - result = self.T.fillna(method=method, limit=limit).T - - # need to downcast here because of all of the transposes - result._data = result._data.downcast() - - return result # > 3d if self.ndim > 3: @@ -2319,6 +2310,16 @@ def fillna(self, value=None, method=None, axis=0, inplace=False, return result.__finalize__(self) + if self._is_mixed_type and axis == 1: + if inplace: + raise NotImplementedError() + result = self.T.fillna(method=method, limit=limit).T + + # need to downcast here because of all of the transposes + result._data = result._data.downcast() + + return result + # 2d or less method = com._clean_fill_method(method) new_data = self._data.interpolate(method=method, diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 14e4e32acae9f..0692e7b973849 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -1345,17 +1345,20 @@ def test_fillna(self): filled = self.panel.fillna(0) self.assertTrue(np.isfinite(filled.values).all()) - filled = self.panel.fillna(method='backfill') + filled = self.panel.fillna(method='backfill', axis=1) assert_frame_equal(filled['ItemA'], self.panel['ItemA'].fillna(method='backfill')) panel = self.panel.copy() panel['str'] = 'foo' - filled = panel.fillna(method='backfill') + filled = panel.fillna(method='backfill', axis=1) assert_frame_equal(filled['ItemA'], panel['ItemA'].fillna(method='backfill')) + filled = self.panel.fillna(method='ffill', axis=0) + self.assertEqual(filled.iloc[2,3,3], self.panel.iloc[0,3,3]) + empty = self.panel.reindex(items=[]) filled = empty.fillna(0) assert_panel_equal(filled, empty) From 158d11b50d18bcbbc23fea644ba5f024fbf40d00 Mon Sep 17 00:00:00 2001 From: stahlous Date: Fri, 26 Sep 2014 20:57:11 +0000 Subject: [PATCH 05/27] fill axis defaults to stat axis --- pandas/core/generic.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 9f30926e6b633..61cba73fe468e 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2237,7 +2237,7 @@ def convert_objects(self, convert_dates=True, convert_numeric=False, #---------------------------------------------------------------------- # Filling NA's - def fillna(self, value=None, method=None, axis=0, inplace=False, + def fillna(self, value=None, method=None, axis=None, inplace=False, limit=None, downcast=None): """ Fill NA/NaN values using the specified method @@ -2253,7 +2253,8 @@ def fillna(self, value=None, method=None, axis=0, inplace=False, Method to use for filling holes in reindexed Series pad / ffill: propagate last valid observation forward to next valid backfill / bfill: use NEXT valid observation to fill gap - axis : {0, 1, 2}, default 0 + axis : {0, 1, 2}, defaults to the stat axis + The stat axis is 0 for Series and DataFrame and 1 for Panel inplace : boolean, default False If True, fill in place. Note: this will modify any other views on this object, (e.g. a no-copy slice for a column in a @@ -2278,7 +2279,10 @@ def fillna(self, value=None, method=None, axis=0, inplace=False, 'you passed a "{0}"'.format(type(value).__name__)) self._consolidate_inplace() - axis = self._get_axis_number(axis) + if axis is None: + axis = self._get_axis_number(self._stat_axis_name) + else: + axis = self._get_axis_number(axis) method = com._clean_fill_method(method) from pandas import DataFrame From 9b1b9c286d845196fe0e016ce4d890694d329f92 Mon Sep 17 00:00:00 2001 From: stahlous Date: Fri, 26 Sep 2014 21:22:25 +0000 Subject: [PATCH 06/27] implement inplace; simplify a bit --- pandas/core/generic.py | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 61cba73fe468e..0351d00c76e4f 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2300,19 +2300,24 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, elif self.ndim == 3: if axis == 0: data = self.transpose(1, 0, 2) - result = dict([(col, s.fillna(method=method, value=value, - axis=0, limit=limit)) - for col, s in compat.iteritems(data)]) - result = self._constructor.from_dict(result) - result = result.transpose(1, 0, 2) - + new_axis = 0 else: - result = dict([(col, s.fillna(method=method, value=value, - axis=axis-1, limit=limit)) - for col, s in compat.iteritems(self)]) - result = self._constructor.from_dict(result) + data = self + new_axis = axis - 1 + + result = dict([(col, s.fillna(value=value, method=method, + axis=new_axis, inplace=inplace, + limit=limit, downcast=downcast)) + for col, s in compat.iteritems(data)]) + result = self._constructor.from_dict(result) - return result.__finalize__(self) + if axis == 0: + result = result.transpose(1, 0 ,2) + if inplace: + self._update_inplace(self._data) + return + else: + return result.__finalize__(self) if self._is_mixed_type and axis == 1: if inplace: From 194eb87ef1d7dabd7bd9ff3a8339f466298ba958 Mon Sep 17 00:00:00 2001 From: stahlous Date: Fri, 26 Sep 2014 21:48:05 +0000 Subject: [PATCH 07/27] changed ffill and bfill default axis to None --- pandas/core/generic.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 0351d00c76e4f..ccb1d19d2690d 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2386,12 +2386,12 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, else: return self._constructor(new_data).__finalize__(self) - def ffill(self, axis=0, inplace=False, limit=None, downcast=None): + def ffill(self, axis=None, inplace=False, limit=None, downcast=None): "Synonym for NDFrame.fillna(method='ffill')" return self.fillna(method='ffill', axis=axis, inplace=inplace, limit=limit, downcast=downcast) - def bfill(self, axis=0, inplace=False, limit=None, downcast=None): + def bfill(self, axis=None, inplace=False, limit=None, downcast=None): "Synonym for NDFrame.fillna(method='bfill')" return self.fillna(method='bfill', axis=axis, inplace=inplace, limit=limit, downcast=downcast) From 78d484eb385f7912f46463e4ca23554623ff2a49 Mon Sep 17 00:00:00 2001 From: stahlous Date: Fri, 26 Sep 2014 22:07:36 +0000 Subject: [PATCH 08/27] fixed inplace implementation --- pandas/core/generic.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index ccb1d19d2690d..bcfd80f1f32d9 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2309,14 +2309,11 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, axis=new_axis, inplace=inplace, limit=limit, downcast=downcast)) for col, s in compat.iteritems(data)]) - result = self._constructor.from_dict(result) - if axis == 0: - result = result.transpose(1, 0 ,2) - if inplace: - self._update_inplace(self._data) - return - else: + if not inplace: + result = self._constructor.from_dict(result) + if axis == 0: + result = result.transpose(1, 0 ,2) return result.__finalize__(self) if self._is_mixed_type and axis == 1: From 384d896c8ea2df2d7ca02e5c93f818bd603d9e14 Mon Sep 17 00:00:00 2001 From: stahlous Date: Sun, 28 Sep 2014 04:14:15 +0000 Subject: [PATCH 09/27] added in @staple's tests --- pandas/tests/test_panel.py | 67 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 64 insertions(+), 3 deletions(-) diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 0692e7b973849..1b0583d9f28ae 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -1356,23 +1356,84 @@ def test_fillna(self): assert_frame_equal(filled['ItemA'], panel['ItemA'].fillna(method='backfill')) - filled = self.panel.fillna(method='ffill', axis=0) - self.assertEqual(filled.iloc[2,3,3], self.panel.iloc[0,3,3]) + # Fill forward. + filled = self.panel.fillna(method='ffill') + assert_frame_equal(filled['ItemA'], + self.panel['ItemA'].fillna(method='ffill')) + + # With limit. + filled = self.panel.fillna(method='backfill', limit=1) + assert_frame_equal(filled['ItemA'], + self.panel['ItemA'].fillna(method='backfill', limit=1)) + # With downcast. + rounded = self.panel.apply(lambda x: x.apply(np.round)) + filled = rounded.fillna(method='backfill', downcast='infer') + assert_frame_equal(filled['ItemA'], + rounded['ItemA'].fillna(method='backfill', downcast='infer')) + + # Now explicitly request axis 1. + filled = self.panel.fillna(method='backfill', axis=1) + assert_frame_equal(filled['ItemA'], + self.panel['ItemA'].fillna(method='backfill', axis=0)) + + # Fill along axis 2, equivalent to filling along axis 1 of each + # DataFrame. + filled = self.panel.fillna(method='backfill', axis=2) + assert_frame_equal(filled['ItemA'], + self.panel['ItemA'].fillna(method='backfill', axis=1)) + + # Fill an empty panel. empty = self.panel.reindex(items=[]) filled = empty.fillna(0) assert_panel_equal(filled, empty) + # either method or value must be specified self.assertRaises(ValueError, self.panel.fillna) + # method and value can not both be specified self.assertRaises(ValueError, self.panel.fillna, 5, method='ffill') + # can't pass list or tuple, only scalar self.assertRaises(TypeError, self.panel.fillna, [1, 2]) self.assertRaises(TypeError, self.panel.fillna, (1, 2)) # limit not implemented when only value is specified p = Panel(np.random.randn(3,4,5)) p.iloc[0:2,0:2,0:2] = np.nan - self.assertRaises(NotImplementedError, lambda : p.fillna(999,limit=1)) + self.assertRaises(NotImplementedError, lambda : p.fillna(999, limit=1)) + + def test_fillna_axis_0(self): + # Forward fill along axis 0, interpolating values across DataFrames. + filled = self.panel.fillna(method='ffill', axis=0) + nan_indexes = self.panel['ItemB']['C'].index[ + self.panel['ItemB']['C'].apply(np.isnan)] + # Values from ItemA are filled into ItemB. + assert_series_equal(filled['ItemB']['C'][nan_indexes], + self.panel['ItemA']['C'][nan_indexes]) + + # Backfill along axis 0. + filled = self.panel.fillna(method='backfill', axis=0) + # The test data lacks values that can be backfilled on axis 0. + assert_panel_equal(filled, self.panel) + # Reverse the panel and backfill along axis 0, to properly test + # backfill. + reverse_panel = self.panel.reindex_axis(reversed(self.panel.axes[0])) + filled = reverse_panel.fillna(method='bfill', axis=0) + nan_indexes = reverse_panel['ItemB']['C'].index[ + reverse_panel['ItemB']['C'].apply(np.isnan)] + assert_series_equal(filled['ItemB']['C'][nan_indexes], + reverse_panel['ItemA']['C'][nan_indexes]) + + # Fill along axis 0 with limit. + filled = self.panel.fillna(method='ffill', axis=0, limit=1) + a_nan = self.panel['ItemA']['C'].index[ + self.panel['ItemA']['C'].apply(np.isnan)] + b_nan = self.panel['ItemB']['C'].index[ + self.panel['ItemB']['C'].apply(np.isnan)] + # Cells that are nan in ItemB but not in ItemA remain unfilled in + # ItemC. + self.assertTrue( + filled['ItemC']['C'][b_nan.diff(a_nan)].apply(np.isnan).all()) def test_ffill_bfill(self): assert_panel_equal(self.panel.ffill(), From d55883f994c227506e151df9bc30542f04a8421b Mon Sep 17 00:00:00 2001 From: stahlous Date: Sun, 28 Sep 2014 17:47:19 +0000 Subject: [PATCH 10/27] return explicitly for inplace --- pandas/core/generic.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index bcfd80f1f32d9..e96a5217de5e4 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2315,6 +2315,8 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, if axis == 0: result = result.transpose(1, 0 ,2) return result.__finalize__(self) + else: + return if self._is_mixed_type and axis == 1: if inplace: From a527cba1343da61bdeddde30de3b1d59afc77f09 Mon Sep 17 00:00:00 2001 From: stahlous Date: Sun, 28 Sep 2014 03:40:42 +0000 Subject: [PATCH 11/27] trying Panel.apply --- pandas/core/generic.py | 21 ++++++++++----------- pandas/tests/test_panel.py | 4 ++-- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index e96a5217de5e4..2b70108bbdd7d 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2299,21 +2299,20 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, # 3d elif self.ndim == 3: if axis == 0: - data = self.transpose(1, 0, 2) - new_axis = 0 + new_axis = 1 + apply_axes = (0,1) else: - data = self new_axis = axis - 1 - - result = dict([(col, s.fillna(value=value, method=method, - axis=new_axis, inplace=inplace, - limit=limit, downcast=downcast)) - for col, s in compat.iteritems(data)]) + apply_axes = (1,2) + result = self.apply(lambda s: s.fillna(value=value, + method=method, + axis=new_axis, + inplace=inplace, + limit=limit, + downcast=downcast), + axis=apply_axes) if not inplace: - result = self._constructor.from_dict(result) - if axis == 0: - result = result.transpose(1, 0 ,2) return result.__finalize__(self) else: return diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 1b0583d9f28ae..748d4427e808a 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -1345,14 +1345,14 @@ def test_fillna(self): filled = self.panel.fillna(0) self.assertTrue(np.isfinite(filled.values).all()) - filled = self.panel.fillna(method='backfill', axis=1) + filled = self.panel.fillna(method='backfill') assert_frame_equal(filled['ItemA'], self.panel['ItemA'].fillna(method='backfill')) panel = self.panel.copy() panel['str'] = 'foo' - filled = panel.fillna(method='backfill', axis=1) + filled = panel.fillna(method='backfill') assert_frame_equal(filled['ItemA'], panel['ItemA'].fillna(method='backfill')) From 507f665eea60f55a6c5be61302c7a370aef2410a Mon Sep 17 00:00:00 2001 From: stahlous Date: Mon, 29 Sep 2014 17:32:27 +0000 Subject: [PATCH 12/27] more apply work --- pandas/core/generic.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 2b70108bbdd7d..9ab0fa9994f35 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2300,10 +2300,10 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, elif self.ndim == 3: if axis == 0: new_axis = 1 - apply_axes = (0,1) + apply_axes = (0, 1) else: new_axis = axis - 1 - apply_axes = (1,2) + apply_axes = (1, 2) result = self.apply(lambda s: s.fillna(value=value, method=method, axis=new_axis, @@ -2311,8 +2311,9 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, limit=limit, downcast=downcast), axis=apply_axes) - if not inplace: + if axis == 0: + result = result.transpose(2, 1, 0) return result.__finalize__(self) else: return From 142ee9cc24a4e22b041681b23487a0e6fcb3bdd0 Mon Sep 17 00:00:00 2001 From: stahlous Date: Mon, 29 Sep 2014 17:39:18 +0000 Subject: [PATCH 13/27] merge in new tests --- pandas/tests/test_panel.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 748d4427e808a..1b0583d9f28ae 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -1345,14 +1345,14 @@ def test_fillna(self): filled = self.panel.fillna(0) self.assertTrue(np.isfinite(filled.values).all()) - filled = self.panel.fillna(method='backfill') + filled = self.panel.fillna(method='backfill', axis=1) assert_frame_equal(filled['ItemA'], self.panel['ItemA'].fillna(method='backfill')) panel = self.panel.copy() panel['str'] = 'foo' - filled = panel.fillna(method='backfill') + filled = panel.fillna(method='backfill', axis=1) assert_frame_equal(filled['ItemA'], panel['ItemA'].fillna(method='backfill')) From 3e1272eded7abacf8ef51f0db3a1b9e095af59c9 Mon Sep 17 00:00:00 2001 From: stahlous Date: Mon, 29 Sep 2014 23:30:46 +0000 Subject: [PATCH 14/27] simplify return --- pandas/core/generic.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 9ab0fa9994f35..141d599187f63 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2311,12 +2311,9 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, limit=limit, downcast=downcast), axis=apply_axes) - if not inplace: - if axis == 0: - result = result.transpose(2, 1, 0) - return result.__finalize__(self) - else: - return + if axis == 0: + result = result.transpose(2, 1, 0) + return result if not inplace else None if self._is_mixed_type and axis == 1: if inplace: From 1fab8a4f9f927da50b9c9021fc83aa24aac52637 Mon Sep 17 00:00:00 2001 From: stahlous Date: Tue, 30 Sep 2014 21:31:44 +0000 Subject: [PATCH 15/27] update tests with issue num and spacing before comments --- pandas/core/generic.py | 23 +++++++++++++---------- pandas/tests/test_panel.py | 10 ++++++++-- 2 files changed, 21 insertions(+), 12 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 141d599187f63..46cb430f389c5 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2290,31 +2290,35 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, if method is None: raise ValueError('must specify a fill method or value') - # > 3d + # >3d if self.ndim > 3: raise NotImplementedError( 'Cannot fillna with a method for > 3dims' ) + # 3d - elif self.ndim == 3: + if self.ndim == 3: if axis == 0: - new_axis = 1 + fill_axis = 1 apply_axes = (0, 1) else: - new_axis = axis - 1 + fill_axis = axis - 1 apply_axes = (1, 2) - result = self.apply(lambda s: s.fillna(value=value, + + result = self.apply(lambda f: f.fillna(value=value, method=method, - axis=new_axis, - inplace=inplace, - limit=limit, - downcast=downcast), + axis=fill_axis, + inplace=inplace, + limit=limit, + downcast=downcast), axis=apply_axes) + if axis == 0: result = result.transpose(2, 1, 0) return result if not inplace else None + # 2d or less if self._is_mixed_type and axis == 1: if inplace: raise NotImplementedError() @@ -2325,7 +2329,6 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, return result - # 2d or less method = com._clean_fill_method(method) new_data = self._data.interpolate(method=method, axis=axis, diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 1b0583d9f28ae..c70d2b599e038 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -1345,14 +1345,14 @@ def test_fillna(self): filled = self.panel.fillna(0) self.assertTrue(np.isfinite(filled.values).all()) - filled = self.panel.fillna(method='backfill', axis=1) + filled = self.panel.fillna(method='backfill') assert_frame_equal(filled['ItemA'], self.panel['ItemA'].fillna(method='backfill')) panel = self.panel.copy() panel['str'] = 'foo' - filled = panel.fillna(method='backfill', axis=1) + filled = panel.fillna(method='backfill') assert_frame_equal(filled['ItemA'], panel['ItemA'].fillna(method='backfill')) @@ -1403,18 +1403,23 @@ def test_fillna(self): self.assertRaises(NotImplementedError, lambda : p.fillna(999, limit=1)) def test_fillna_axis_0(self): + # GH 8395 + # Forward fill along axis 0, interpolating values across DataFrames. filled = self.panel.fillna(method='ffill', axis=0) nan_indexes = self.panel['ItemB']['C'].index[ self.panel['ItemB']['C'].apply(np.isnan)] + # Values from ItemA are filled into ItemB. assert_series_equal(filled['ItemB']['C'][nan_indexes], self.panel['ItemA']['C'][nan_indexes]) # Backfill along axis 0. filled = self.panel.fillna(method='backfill', axis=0) + # The test data lacks values that can be backfilled on axis 0. assert_panel_equal(filled, self.panel) + # Reverse the panel and backfill along axis 0, to properly test # backfill. reverse_panel = self.panel.reindex_axis(reversed(self.panel.axes[0])) @@ -1430,6 +1435,7 @@ def test_fillna_axis_0(self): self.panel['ItemA']['C'].apply(np.isnan)] b_nan = self.panel['ItemB']['C'].index[ self.panel['ItemB']['C'].apply(np.isnan)] + # Cells that are nan in ItemB but not in ItemA remain unfilled in # ItemC. self.assertTrue( From 6f02fa57ae04b34416b2c482d8550d1526fe92c2 Mon Sep 17 00:00:00 2001 From: stahlous Date: Wed, 1 Oct 2014 03:31:38 +0000 Subject: [PATCH 16/27] added Panel4D.fillna suport and tests --- pandas/core/generic.py | 16 +++--- pandas/tests/test_panel4d.py | 98 +++++++++++++++++++++++++++++++++++- 2 files changed, 106 insertions(+), 8 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 46cb430f389c5..8a1f2ca494525 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2290,15 +2290,14 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, if method is None: raise ValueError('must specify a fill method or value') - # >3d - if self.ndim > 3: + # >4d + if self.ndim > 4: raise NotImplementedError( - 'Cannot fillna with a method for > 3dims' + 'Cannot fillna with a method for >4 dims' ) - - # 3d - if self.ndim == 3: + # 3d or 4d + if self.ndim >= 3: if axis == 0: fill_axis = 1 apply_axes = (0, 1) @@ -2315,7 +2314,10 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, axis=apply_axes) if axis == 0: - result = result.transpose(2, 1, 0) + if self.ndim == 3: + result = result.transpose(2, 1, 0) + else: + result = result.transpose(1, 2, 0, 3) return result if not inplace else None # 2d or less diff --git a/pandas/tests/test_panel4d.py b/pandas/tests/test_panel4d.py index e88a8c3b2874c..9e442837b8d83 100644 --- a/pandas/tests/test_panel4d.py +++ b/pandas/tests/test_panel4d.py @@ -845,11 +845,107 @@ def test_sort_index(self): # assert_panel_equal(sorted_panel, self.panel) def test_fillna(self): + # GH 8395 self.assertFalse(np.isfinite(self.panel4d.values).all()) filled = self.panel4d.fillna(0) self.assertTrue(np.isfinite(filled.values).all()) - self.assertRaises(NotImplementedError, self.panel4d.fillna, method='pad') + filled = self.panel4d.fillna(method='backfill') + assert_frame_equal(filled['l1']['ItemA'], + self.panel4d['l1']['ItemA'].fillna(method='backfill')) + + panel4d = self.panel4d.copy() + panel4d['str'] = 'foo' + + filled = panel4d.fillna(method='backfill') + assert_frame_equal(filled['l1']['ItemA'], + panel4d['l1']['ItemA'].fillna(method='backfill')) + + # Fill forward. + filled = self.panel4d.fillna(method='ffill') + assert_frame_equal(filled['l1']['ItemA'], + self.panel4d['l1']['ItemA'].fillna(method='ffill')) + + # With limit. + filled = self.panel4d.fillna(method='backfill', limit=1) + assert_frame_equal(filled['l1']['ItemA'], + self.panel4d['l1']['ItemA'].fillna(method='backfill', limit=1)) + + # With downcast. + rounded = self.panel4d.apply(lambda x: x.apply(np.round)) + filled = rounded.fillna(method='backfill', downcast='infer') + assert_frame_equal(filled['l1']['ItemA'], + rounded['l1']['ItemA'].fillna(method='backfill', downcast='infer')) + + # Now explicitly request axis 2. + filled = self.panel4d.fillna(method='backfill', axis=2) + assert_frame_equal(filled['l1']['ItemA'], + self.panel4d['l1']['ItemA'].fillna(method='backfill', axis=0)) + + # Fill along axis 3, equivalent to filling along axis 1 of each + # DataFrame. + filled = self.panel4d.fillna(method='backfill', axis=3) + assert_frame_equal(filled['l1']['ItemA'], + self.panel4d['l1']['ItemA'].fillna(method='backfill', axis=1)) + + # Fill an empty panel. + empty = self.panel4d.reindex(items=[]) + filled = empty.fillna(0) + assert_panel4d_equal(filled, empty) + + # either method or value must be specified + self.assertRaises(ValueError, self.panel4d.fillna) + # method and value can not both be specified + self.assertRaises(ValueError, self.panel4d.fillna, 5, method='ffill') + + # can't pass list or tuple, only scalar + self.assertRaises(TypeError, self.panel4d.fillna, [1, 2]) + self.assertRaises(TypeError, self.panel4d.fillna, (1, 2)) + + # limit not implemented when only value is specified + p = Panel4D(np.random.randn(3,4,5,6)) + p.iloc[0:2,0:2,0:2,0:2] = np.nan + self.assertRaises(NotImplementedError, lambda : p.fillna(999, limit=1)) + + def test_fillna_axis_0(self): + # GH 8395 + + # Forward fill along axis 0, interpolating values across DataFrames. + filled = self.panel4d.fillna(method='ffill', axis=0) + nan_indexes = self.panel4d['l1']['ItemB']['C'].index[ + self.panel4d['l1']['ItemB']['C'].apply(np.isnan)] + + # Values from ItemA are filled into ItemB. + assert_series_equal(filled['l1']['ItemB']['C'][nan_indexes], + self.panel4d['l1']['ItemA']['C'][nan_indexes]) + + # Backfill along axis 0. + filled = self.panel4d.fillna(method='backfill', axis=0) + + # The test data lacks values that can be backfilled on axis 0. + assert_panel4d_equal(filled, self.panel4d) + + # Reverse the panel and backfill along axis 0, to properly test + # backfill. + reverse_panel = self.panel4d.reindex_axis(reversed(self.panel4d.axes[0])) + filled = reverse_panel.fillna(method='bfill', axis=0) + nan_indexes = reverse_panel['l3']['ItemB']['C'].index[ + reverse_panel['l3']['ItemB']['C'].apply(np.isnan)] + assert_series_equal(filled['l1']['ItemB']['C'][nan_indexes], + reverse_panel['l3']['ItemB']['C'][nan_indexes]) + + # Fill along axis 0 with limit. + filled = self.panel4d.fillna(method='ffill', axis=0, limit=1) + a_nan = self.panel4d['l1']['ItemA']['C'].index[ + self.panel4d['l1']['ItemA']['C'].apply(np.isnan)] + b_nan = self.panel4d['l1']['ItemB']['C'].index[ + self.panel4d['l1']['ItemB']['C'].apply(np.isnan)] + + # Cells that are nan in ItemB but not in ItemA remain unfilled in + # ItemC. + self.assertTrue( + filled['l1']['ItemC']['C'][b_nan.diff(a_nan)].apply(np.isnan).all()) + def test_swapaxes(self): result = self.panel4d.swapaxes('labels', 'items') From 58d450b50db32f7830b612e718d68423925c658a Mon Sep 17 00:00:00 2001 From: stahlous Date: Wed, 1 Oct 2014 04:00:37 +0000 Subject: [PATCH 17/27] added updates to v0.15.0.txt and updated fillna docstring --- pandas/core/generic.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 8a1f2ca494525..9c379615f162c 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2253,8 +2253,8 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, Method to use for filling holes in reindexed Series pad / ffill: propagate last valid observation forward to next valid backfill / bfill: use NEXT valid observation to fill gap - axis : {0, 1, 2}, defaults to the stat axis - The stat axis is 0 for Series and DataFrame and 1 for Panel + axis : {0, 1, 2, 3}, defaults to the stat axis + The stat axis is 0 for Series and DataFrame, 1 for Panel, and 2 for Panel4D inplace : boolean, default False If True, fill in place. Note: this will modify any other views on this object, (e.g. a no-copy slice for a column in a From fa3f34bf053e96c62df1629752136493bb378089 Mon Sep 17 00:00:00 2001 From: stahlous Date: Sun, 5 Oct 2014 05:53:47 +0000 Subject: [PATCH 18/27] more straightforward fillna method --- pandas/core/generic.py | 70 ++++++++++++------------------------------ 1 file changed, 19 insertions(+), 51 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 9c379615f162c..f72ab09cd3ad2 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2260,7 +2260,7 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, other views on this object, (e.g. a no-copy slice for a column in a DataFrame). limit : int, default None - Maximum size gap to forward or backward fill + eaxinum size gap to forward or backward fill downcast : dict, default is None a dict of item->dtype of what to downcast if possible, or the string 'infer' which will try to downcast to an appropriate @@ -2274,74 +2274,41 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, ------- filled : same type as caller """ - if isinstance(value, (list, tuple)): - raise TypeError('"value" parameter must be a scalar or dict, but ' - 'you passed a "{0}"'.format(type(value).__name__)) self._consolidate_inplace() if axis is None: axis = self._get_axis_number(self._stat_axis_name) else: axis = self._get_axis_number(axis) - method = com._clean_fill_method(method) from pandas import DataFrame if value is None: if method is None: raise ValueError('must specify a fill method or value') - # >4d - if self.ndim > 4: - raise NotImplementedError( - 'Cannot fillna with a method for >4 dims' - ) - - # 3d or 4d - if self.ndim >= 3: - if axis == 0: - fill_axis = 1 - apply_axes = (0, 1) - else: - fill_axis = axis - 1 - apply_axes = (1, 2) + method = com._clean_fill_method(method) - result = self.apply(lambda f: f.fillna(value=value, - method=method, - axis=fill_axis, - inplace=inplace, + if self.ndim > 1: + result = self.apply(lambda s: s.fillna(method=method, limit=limit, downcast=downcast), - axis=apply_axes) - - if axis == 0: - if self.ndim == 3: - result = result.transpose(2, 1, 0) - else: - result = result.transpose(1, 2, 0, 3) - return result if not inplace else None - - # 2d or less - if self._is_mixed_type and axis == 1: - if inplace: - raise NotImplementedError() - result = self.T.fillna(method=method, limit=limit).T - - # need to downcast here because of all of the transposes - result._data = result._data.downcast() - - return result - - method = com._clean_fill_method(method) - new_data = self._data.interpolate(method=method, - axis=axis, - limit=limit, - inplace=inplace, - coerce=True, - downcast=downcast) + axis=axis) + result = result.convert_objects(convert_numeric=True) + new_data = result._data + else: + new_data = self._data.interpolate(method=method, + axis=axis, + limit=limit, + coerce=True, + downcast=downcast) else: if method is not None: raise ValueError('cannot specify both a fill method and value') + if isinstance(value, (list, tuple)): + raise TypeError('"value" parameter must be a scalar or dict, but ' + 'you passed a "{0}"'.format(type(value).__name__)) + if len(self._get_axis(axis)) == 0: return self @@ -2385,7 +2352,8 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, if inplace: self._update_inplace(new_data) else: - return self._constructor(new_data).__finalize__(self) + result = self._constructor(new_data).__finalize__(self) + return result def ffill(self, axis=None, inplace=False, limit=None, downcast=None): "Synonym for NDFrame.fillna(method='ffill')" From 9548076b6be4a7de2db58dbbfa76d2c0b743330b Mon Sep 17 00:00:00 2001 From: stahlous Date: Sun, 5 Oct 2014 23:10:04 +0000 Subject: [PATCH 19/27] fix panel4d tests --- pandas/core/generic.py | 6 +++--- pandas/tests/test_panel4d.py | 32 ++++++++++++++++---------------- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index f72ab09cd3ad2..bea0c0233675c 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2260,9 +2260,9 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, other views on this object, (e.g. a no-copy slice for a column in a DataFrame). limit : int, default None - eaxinum size gap to forward or backward fill + Maximum size gap to forward or backward fill downcast : dict, default is None - a dict of item->dtype of what to downcast if possible, + A dict of item->dtype of what to downcast if possible, or the string 'infer' which will try to downcast to an appropriate equal type (e.g. float64 to int64 if possible) @@ -2277,7 +2277,7 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, self._consolidate_inplace() if axis is None: - axis = self._get_axis_number(self._stat_axis_name) + axis = self._stat_axis_number else: axis = self._get_axis_number(axis) diff --git a/pandas/tests/test_panel4d.py b/pandas/tests/test_panel4d.py index 9e442837b8d83..4837b94fe3d53 100644 --- a/pandas/tests/test_panel4d.py +++ b/pandas/tests/test_panel4d.py @@ -910,41 +910,41 @@ def test_fillna(self): def test_fillna_axis_0(self): # GH 8395 - # Forward fill along axis 0, interpolating values across DataFrames. - filled = self.panel4d.fillna(method='ffill', axis=0) + # Back fill along axis 0, interpolating values across Panels + filled = self.panel4d.fillna(method='bfill', axis=0) nan_indexes = self.panel4d['l1']['ItemB']['C'].index[ self.panel4d['l1']['ItemB']['C'].apply(np.isnan)] - # Values from ItemA are filled into ItemB. + # Values from ItemC are filled into ItemB. assert_series_equal(filled['l1']['ItemB']['C'][nan_indexes], - self.panel4d['l1']['ItemA']['C'][nan_indexes]) + self.panel4d['l1']['ItemC']['C'][nan_indexes]) - # Backfill along axis 0. - filled = self.panel4d.fillna(method='backfill', axis=0) + # Forward fill along axis 0. + filled = self.panel4d.fillna(method='ffill', axis=0) # The test data lacks values that can be backfilled on axis 0. assert_panel4d_equal(filled, self.panel4d) # Reverse the panel and backfill along axis 0, to properly test - # backfill. + # forward fill. reverse_panel = self.panel4d.reindex_axis(reversed(self.panel4d.axes[0])) - filled = reverse_panel.fillna(method='bfill', axis=0) + filled = reverse_panel.fillna(method='ffill', axis=0) nan_indexes = reverse_panel['l3']['ItemB']['C'].index[ reverse_panel['l3']['ItemB']['C'].apply(np.isnan)] - assert_series_equal(filled['l1']['ItemB']['C'][nan_indexes], - reverse_panel['l3']['ItemB']['C'][nan_indexes]) + assert_series_equal(filled['l3']['ItemB']['C'][nan_indexes], + reverse_panel['l1']['ItemB']['C'][nan_indexes]) # Fill along axis 0 with limit. - filled = self.panel4d.fillna(method='ffill', axis=0, limit=1) - a_nan = self.panel4d['l1']['ItemA']['C'].index[ - self.panel4d['l1']['ItemA']['C'].apply(np.isnan)] + filled = self.panel4d.fillna(method='bfill', axis=0, limit=1) + c_nan = self.panel4d['l1']['ItemC']['C'].index[ + self.panel4d['l1']['ItemC']['C'].apply(np.isnan)] b_nan = self.panel4d['l1']['ItemB']['C'].index[ self.panel4d['l1']['ItemB']['C'].apply(np.isnan)] - # Cells that are nan in ItemB but not in ItemA remain unfilled in - # ItemC. + # Cells that are nan in ItemB but not in ItemC remain unfilled in + # ItemA. self.assertTrue( - filled['l1']['ItemC']['C'][b_nan.diff(a_nan)].apply(np.isnan).all()) + filled['l1']['ItemA']['C'][b_nan.diff(c_nan)].apply(np.isnan).all()) def test_swapaxes(self): From 40a757ccc0b00c5c3c0ab6440a50304cc7616c32 Mon Sep 17 00:00:00 2001 From: stahlous Date: Sun, 12 Oct 2014 01:50:13 +0000 Subject: [PATCH 20/27] remove 'convert_numeric=True' --- pandas/core/generic.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index bea0c0233675c..3f02a1fa6dfbf 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2293,8 +2293,7 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, limit=limit, downcast=downcast), axis=axis) - result = result.convert_objects(convert_numeric=True) - new_data = result._data + new_data = result.convert_objects()._data else: new_data = self._data.interpolate(method=method, axis=axis, From b1ecb43a51908f35df5343bf2056f356a7b35203 Mon Sep 17 00:00:00 2001 From: Nick Stahl Date: Mon, 13 Oct 2014 13:49:57 -0400 Subject: [PATCH 21/27] implement inplace in memory efficient way --- pandas/core/generic.py | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 3f02a1fa6dfbf..e033c4d9ed2ff 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2288,18 +2288,24 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, method = com._clean_fill_method(method) + def interp_func(series): + return series._data.interpolate(method=method, + inplace=inplace, + limit=limit, + coerce=True, + downcast=downcast) if self.ndim > 1: - result = self.apply(lambda s: s.fillna(method=method, - limit=limit, - downcast=downcast), - axis=axis) - new_data = result.convert_objects()._data + if inplace: + self.apply(interp_func, axis=axis) + new_data = self._data + else: + result = self.apply(lambda s: s.fillna(method=method, + limit=limit, + downcast=downcast), + axis=axis) + new_data = result.convert_objects()._data else: - new_data = self._data.interpolate(method=method, - axis=axis, - limit=limit, - coerce=True, - downcast=downcast) + new_data = interp_func(self) else: if method is not None: raise ValueError('cannot specify both a fill method and value') From a14706109d097660fca3b1dfda8dcf1987c78380 Mon Sep 17 00:00:00 2001 From: Nick Stahl Date: Mon, 13 Oct 2014 18:44:31 -0400 Subject: [PATCH 22/27] invoke convert_objects with copy=False --- pandas/core/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index e033c4d9ed2ff..b3fa664d1aa10 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2303,7 +2303,7 @@ def interp_func(series): limit=limit, downcast=downcast), axis=axis) - new_data = result.convert_objects()._data + new_data = result.convert_objects(copy=False)._data else: new_data = interp_func(self) else: From adb9379b839bae9c15332745388619e7ebbea401 Mon Sep 17 00:00:00 2001 From: Nick Stahl Date: Wed, 15 Oct 2014 02:07:33 -0400 Subject: [PATCH 23/27] implement categorical preservation --- pandas/core/common.py | 18 ++++++++++++++++++ pandas/core/generic.py | 9 ++++++++- 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/pandas/core/common.py b/pandas/core/common.py index 51464e1809e75..19cebeed9a4f3 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -3118,3 +3118,21 @@ def _maybe_match_name(a, b): if a_name == b_name: return a_name return None + +def _get_cats(f): + from pandas.core.common import CategoricalDtype + if f._stat_axis_number == 0: + return dict((pt, f[pt].cat) for pt in f if + isinstance(f[pt].dtype, CategoricalDtype)) + else: + return dict((pt, _get_cats(f[pt])) for pt in f) + +def _restore_cats(f, cats): + from pandas.core.categorical import Categorical + for pt, sub in cats.items(): + if isinstance(sub, dict): + _restore_cats(f[pt], sub) + else: + f[pt] = Categorical(f[pt], + categories=sub.categories, + ordered=sub.ordered) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index b3fa664d1aa10..01edc440a988f 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2299,11 +2299,18 @@ def interp_func(series): self.apply(interp_func, axis=axis) new_data = self._data else: + cats = None + if self._is_mixed_type: + cats = com._get_cats(self) + result = self.apply(lambda s: s.fillna(method=method, limit=limit, downcast=downcast), axis=axis) - new_data = result.convert_objects(copy=False)._data + result = result.convert_objects(copy=False) + if cats: + com._restore_cats(result, cats) + new_data = result._data else: new_data = interp_func(self) else: From 36d0dd86f7f6c5519d5ac3431be53e631cf0e8d5 Mon Sep 17 00:00:00 2001 From: Nick Stahl Date: Wed, 15 Oct 2014 23:47:46 -0400 Subject: [PATCH 24/27] rework categories implementation --- pandas/core/common.py | 18 ------------------ pandas/core/generic.py | 24 +++++++++++++++++++++--- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/pandas/core/common.py b/pandas/core/common.py index 19cebeed9a4f3..51464e1809e75 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -3118,21 +3118,3 @@ def _maybe_match_name(a, b): if a_name == b_name: return a_name return None - -def _get_cats(f): - from pandas.core.common import CategoricalDtype - if f._stat_axis_number == 0: - return dict((pt, f[pt].cat) for pt in f if - isinstance(f[pt].dtype, CategoricalDtype)) - else: - return dict((pt, _get_cats(f[pt])) for pt in f) - -def _restore_cats(f, cats): - from pandas.core.categorical import Categorical - for pt, sub in cats.items(): - if isinstance(sub, dict): - _restore_cats(f[pt], sub) - else: - f[pt] = Categorical(f[pt], - categories=sub.categories, - ordered=sub.ordered) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 01edc440a988f..f6fc42dc0b031 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -21,10 +21,12 @@ from pandas.core.common import (isnull, notnull, is_list_like, _values_from_object, _maybe_promote, _maybe_box_datetimelike, ABCSeries, - SettingWithCopyError, SettingWithCopyWarning) + SettingWithCopyError, SettingWithCopyWarning, + CategoricalDtype) import pandas.core.nanops as nanops from pandas.util.decorators import Appender, Substitution, deprecate_kwarg from pandas.core import config +from pandas.core.categorical import Categorical # goal is to be able to define the docs close to function, while still being # able to share @@ -2301,7 +2303,7 @@ def interp_func(series): else: cats = None if self._is_mixed_type: - cats = com._get_cats(self) + cats = self._get_cats() result = self.apply(lambda s: s.fillna(method=method, limit=limit, @@ -2309,7 +2311,7 @@ def interp_func(series): axis=axis) result = result.convert_objects(copy=False) if cats: - com._restore_cats(result, cats) + result._restore_cats(cats) new_data = result._data else: new_data = interp_func(self) @@ -2367,6 +2369,22 @@ def interp_func(series): result = self._constructor(new_data).__finalize__(self) return result + def _get_cats(self): + if self._stat_axis_number == 0: + return dict((pt, self[pt].cat) for pt in self if + isinstance(self[pt].dtype, CategoricalDtype)) + else: + return dict((pt, self[pt]._get_cats()) for pt in self) + + def _restore_cats(self, cats): + for pt, sub in cats.items(): + if isinstance(sub, dict): + self[pt]._restore_cats(sub) + else: + self[pt] = Categorical(self[pt], + categories=sub.categories, + ordered=sub.ordered) + def ffill(self, axis=None, inplace=False, limit=None, downcast=None): "Synonym for NDFrame.fillna(method='ffill')" return self.fillna(method='ffill', axis=axis, inplace=inplace, From 3eef736cfd886929e125f19b941975caef034b6f Mon Sep 17 00:00:00 2001 From: Nick Stahl Date: Thu, 16 Oct 2014 00:12:09 -0400 Subject: [PATCH 25/27] minor cleanup --- pandas/core/generic.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index f6fc42dc0b031..3eee4fb5c40b4 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2366,8 +2366,7 @@ def interp_func(series): if inplace: self._update_inplace(new_data) else: - result = self._constructor(new_data).__finalize__(self) - return result + return self._constructor(new_data).__finalize__(self) def _get_cats(self): if self._stat_axis_number == 0: From cfab77ed0cc33200f48acd693263da4d3f7cb345 Mon Sep 17 00:00:00 2001 From: Nick Stahl Date: Fri, 31 Oct 2014 19:25:45 -0400 Subject: [PATCH 26/27] another way of going about it --- pandas/core/generic.py | 62 ++++++++++++++---------------------------- 1 file changed, 21 insertions(+), 41 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 3eee4fb5c40b4..a1ddff009006d 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -28,6 +28,8 @@ from pandas.core import config from pandas.core.categorical import Categorical +from itertools import product + # goal is to be able to define the docs close to function, while still being # able to share _shared_docs = dict() @@ -2290,31 +2292,25 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, method = com._clean_fill_method(method) - def interp_func(series): - return series._data.interpolate(method=method, - inplace=inplace, - limit=limit, - coerce=True, - downcast=downcast) - if self.ndim > 1: - if inplace: - self.apply(interp_func, axis=axis) - new_data = self._data - else: - cats = None - if self._is_mixed_type: - cats = self._get_cats() - - result = self.apply(lambda s: s.fillna(method=method, - limit=limit, - downcast=downcast), - axis=axis) - result = result.convert_objects(copy=False) - if cats: - result._restore_cats(cats) - new_data = result._data - else: - new_data = interp_func(self) + off_axes = list(range(self.ndim)) + off_axes.remove(axis) + expanded = [list(range(self.shape[x])) for x in off_axes] + frame = self if inplace else self.copy() + for axes_prod in product(*expanded): + slicer = list(axes_prod) + slicer.insert(axis, slice(None)) + sl = tuple(slicer) + piece = frame.iloc[sl] + new_data = piece._data.interpolate(method=method, + limit=limit, + inplace=True, + coerce=True) + frame.iloc[sl] = piece._constructor(new_data) + + new_data = frame._data + if downcast: + new_data = new_data.downcast(dtypes=downcast) + else: if method is not None: raise ValueError('cannot specify both a fill method and value') @@ -2368,22 +2364,6 @@ def interp_func(series): else: return self._constructor(new_data).__finalize__(self) - def _get_cats(self): - if self._stat_axis_number == 0: - return dict((pt, self[pt].cat) for pt in self if - isinstance(self[pt].dtype, CategoricalDtype)) - else: - return dict((pt, self[pt]._get_cats()) for pt in self) - - def _restore_cats(self, cats): - for pt, sub in cats.items(): - if isinstance(sub, dict): - self[pt]._restore_cats(sub) - else: - self[pt] = Categorical(self[pt], - categories=sub.categories, - ordered=sub.ordered) - def ffill(self, axis=None, inplace=False, limit=None, downcast=None): "Synonym for NDFrame.fillna(method='ffill')" return self.fillna(method='ffill', axis=axis, inplace=inplace, From a6705715e555756af51672c584dc645a0bc356da Mon Sep 17 00:00:00 2001 From: Nick Stahl Date: Sat, 1 Nov 2014 15:29:01 -0400 Subject: [PATCH 27/27] more fixing --- pandas/core/indexing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 048e4af20d02f..e6f91d2ae6f44 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -421,7 +421,7 @@ def can_do_equal_len(): l = len(value) item = labels[0] - index = self.obj[item].index + index = self.obj[item].axes[0] # equal len list/ndarray if len(index) == l: