-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
BUG: fix Panel.fillna() ignoring axis parameter (re-submission) #11445
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,7 +16,7 @@ | |
from pandas.tseries.period import PeriodIndex | ||
from pandas.core.internals import BlockManager | ||
import pandas.core.common as com | ||
import pandas.core.missing as mis | ||
import pandas.core.missing as missing | ||
import pandas.core.datetools as datetools | ||
from pandas import compat | ||
from pandas.compat import map, zip, lrange, string_types, isidentifier | ||
|
@@ -51,7 +51,7 @@ def _single_replace(self, to_replace, method, inplace, limit): | |
|
||
orig_dtype = self.dtype | ||
result = self if inplace else self.copy() | ||
fill_f = mis._get_fill_func(method) | ||
fill_f = missing._get_fill_func(method) | ||
|
||
mask = com.mask_missing(result.values, to_replace) | ||
values = fill_f(result.values, limit=limit, mask=mask) | ||
|
@@ -1929,7 +1929,7 @@ def reindex(self, *args, **kwargs): | |
|
||
# construct the args | ||
axes, kwargs = self._construct_axes_from_arguments(args, kwargs) | ||
method = mis._clean_reindex_fill_method(kwargs.pop('method', None)) | ||
method = missing._clean_reindex_fill_method(kwargs.pop('method', None)) | ||
level = kwargs.pop('level', None) | ||
copy = kwargs.pop('copy', True) | ||
limit = kwargs.pop('limit', None) | ||
|
@@ -2042,7 +2042,7 @@ def reindex_axis(self, labels, axis=0, method=None, level=None, copy=True, | |
|
||
axis_name = self._get_axis_name(axis) | ||
axis_values = self._get_axis(axis_name) | ||
method = mis._clean_reindex_fill_method(method) | ||
method = missing._clean_reindex_fill_method(method) | ||
new_index, indexer = axis_values.reindex(labels, method, level, | ||
limit=limit) | ||
return self._reindex_with_indexers( | ||
|
@@ -2774,40 +2774,28 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, | |
# set the default here, so functions examining the signaure | ||
# can detect if something was set (e.g. in groupby) (GH9221) | ||
if axis is None: | ||
axis = 0 | ||
axis = self._stat_axis_name | ||
axis = self._get_axis_number(axis) | ||
method = mis._clean_fill_method(method) | ||
method = missing._clean_fill_method(method) | ||
|
||
from pandas import DataFrame | ||
if value is None: | ||
if method is None: | ||
raise ValueError('must specify a fill method or value') | ||
if self._is_mixed_type and axis == 1: | ||
if self._is_mixed_type: | ||
if (self.ndim > 2) and (axis == 0): | ||
raise NotImplementedError('cannot fill across axis 0 for mixed dtypes') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. too many if statements here, this should be a bit more general I think There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The way filling across mixed-dtype blocks for 2D frames is done is by transposing axes. This is actually what I proposed doing in my original PR from last year to extend filling across higher dimensionality frames, but you rejected that idea. I could fill 2D frames using the same general method I'm proposing here, but that will result in in some backwards incompatible behavior and users that depend on being able to fill across mixed-type blocks for 2D would no longer be able to do so. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't recall that. my point is that this should be much simpler logic. Too many if statements here (and in the internals). |
||
if inplace: | ||
raise NotImplementedError() | ||
result = self.T.fillna(method=method, limit=limit).T | ||
|
||
# need to downcast here because of all of the transposes | ||
result._data = result._data.downcast() | ||
|
||
return result | ||
|
||
# > 3d | ||
if self.ndim > 3: | ||
raise NotImplementedError( | ||
'Cannot fillna with a method for > 3dims' | ||
) | ||
raise NotImplementedError('cannot fill inplace for mixed dtypes') | ||
elif (self.ndim == 2) and (axis == 1): | ||
result = self.T.fillna(method=method, limit=limit).T | ||
|
||
# 3d | ||
elif self.ndim == 3: | ||
# need to downcast here because of all of the transposes | ||
result._data = result._data.downcast() | ||
|
||
# fill in 2d chunks | ||
result = dict([(col, s.fillna(method=method, value=value)) | ||
for col, s in compat.iteritems(self)]) | ||
return self._constructor.from_dict(result).__finalize__(self) | ||
return result | ||
|
||
# 2d or less | ||
method = mis._clean_fill_method(method) | ||
method = missing._clean_fill_method(method) | ||
new_data = self._data.interpolate(method=method, | ||
axis=axis, | ||
limit=limit, | ||
|
@@ -3750,7 +3738,7 @@ def align(self, other, join='outer', axis=None, level=None, copy=True, | |
fill_value=None, method=None, limit=None, fill_axis=0, | ||
broadcast_axis=None): | ||
from pandas import DataFrame, Series | ||
method = mis._clean_fill_method(method) | ||
method = missing._clean_fill_method(method) | ||
|
||
if broadcast_axis == 1 and self.ndim != other.ndim: | ||
if isinstance(self, Series): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -49,9 +49,9 @@ def _clean_interp_method(method, **kwargs): | |
return method | ||
|
||
|
||
def interpolate_1d(xvalues, yvalues, method='linear', limit=None, | ||
limit_direction='forward', | ||
fill_value=None, bounds_error=False, order=None, **kwargs): | ||
def interpolate(xvalues, yvalues, method='linear', limit=None, | ||
limit_direction='forward', | ||
fill_value=None, bounds_error=False, order=None, **kwargs): | ||
""" | ||
Logic for the 1-d interpolation. The result should be 1-d, inputs | ||
xvalues and yvalues will each be 1-d arrays of the same length. | ||
|
@@ -219,20 +219,42 @@ def _interpolate_scipy_wrapper(x, y, new_x, method, fill_value=None, | |
return new_y | ||
|
||
|
||
def interpolate_2d(values, method='pad', axis=0, limit=None, fill_value=None, dtype=None): | ||
""" perform an actual interpolation of values, values will be make 2-d if | ||
needed fills inplace, returns the result | ||
def pad(values, method='pad', axis=0, limit=None, fill_value=None, dtype=None): | ||
""" | ||
Perform an actual interpolation of values. 1-d values will be made 2-d temporarily. | ||
Returns the result | ||
""" | ||
|
||
transf = (lambda x: x) if axis == 0 else (lambda x: x.T) | ||
ndim = values.ndim | ||
|
||
# reshape a 1 dim if needed | ||
ndim = values.ndim | ||
if values.ndim == 1: | ||
if ndim == 1: | ||
if axis != 0: # pragma: no cover | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This should be handled higher up |
||
raise AssertionError("cannot interpolate on a ndim == 1 with " | ||
"axis != 0") | ||
values = values.reshape(tuple((1,) + values.shape)) | ||
# recursively slice n-dimension frames (n>2) into (n-1)-dimension frames | ||
elif ndim > 2: | ||
slice_axis = 1 if axis == 0 else 0 | ||
slicer = [slice(None)]*ndim | ||
|
||
if ndim == 3: | ||
axis = 0 if (axis > 1) else 1 | ||
else: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you are changing the meaning of axis here. it is exactly |
||
axis = axis - 1 if (axis > 0) else 0 | ||
|
||
for n in range(values.shape[slice_axis]): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same comments as before |
||
slicer[slice_axis] = n | ||
values[slicer] = pad(values[slicer], | ||
method=method, | ||
axis=axis, | ||
limit=limit, | ||
fill_value=fill_value, | ||
dtype=dtype) | ||
|
||
return values | ||
|
||
transf = (lambda x: x) if axis == 0 else (lambda x: x.T) | ||
|
||
if fill_value is None: | ||
mask = None | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1454,20 +1454,98 @@ def test_fillna(self): | |
assert_frame_equal(filled['ItemA'], | ||
panel['ItemA'].fillna(method='backfill')) | ||
|
||
# GH 11445 | ||
# Fill forward. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. add the issue number here as a comment. |
||
filled = self.panel.fillna(method='ffill') | ||
assert_frame_equal(filled['ItemA'], | ||
self.panel['ItemA'].fillna(method='ffill')) | ||
|
||
# With limit. | ||
filled = self.panel.fillna(method='backfill', limit=1) | ||
assert_frame_equal(filled['ItemA'], | ||
self.panel['ItemA'].fillna(method='backfill', limit=1)) | ||
|
||
# With downcast. | ||
rounded = self.panel.apply(lambda x: x.apply(np.round)) | ||
filled = rounded.fillna(method='backfill', downcast='infer') | ||
assert_frame_equal(filled['ItemA'], | ||
rounded['ItemA'].fillna(method='backfill', downcast='infer')) | ||
|
||
# Now explicitly request axis 1. | ||
filled = self.panel.fillna(method='backfill', axis=1) | ||
assert_frame_equal(filled['ItemA'], | ||
self.panel['ItemA'].fillna(method='backfill', axis=0)) | ||
|
||
# Fill along axis 2, equivalent to filling along axis 1 of each | ||
# DataFrame. | ||
filled = self.panel.fillna(method='backfill', axis=2) | ||
assert_frame_equal(filled['ItemA'], | ||
self.panel['ItemA'].fillna(method='backfill', axis=1)) | ||
|
||
# Fill an empty panel. | ||
empty = self.panel.reindex(items=[]) | ||
filled = empty.fillna(0) | ||
assert_panel_equal(filled, empty) | ||
|
||
# either method or value must be specified | ||
self.assertRaises(ValueError, self.panel.fillna) | ||
|
||
# method and value can not both be specified | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. add a blank line between tests. |
||
self.assertRaises(ValueError, self.panel.fillna, 5, method='ffill') | ||
|
||
# can't pass list or tuple, only scalar | ||
self.assertRaises(TypeError, self.panel.fillna, [1, 2]) | ||
self.assertRaises(TypeError, self.panel.fillna, (1, 2)) | ||
|
||
# limit not implemented when only value is specified | ||
p = Panel(np.random.randn(3,4,5)) | ||
p.iloc[0:2,0:2,0:2] = np.nan | ||
self.assertRaises(NotImplementedError, lambda : p.fillna(999,limit=1)) | ||
self.assertRaises(NotImplementedError, lambda : p.fillna(999, limit=1)) | ||
|
||
def test_fillna_axis_0(self): | ||
# GH 11445 | ||
|
||
# Forward fill along axis 0, interpolating values across DataFrames. | ||
filled = self.panel.fillna(method='ffill', axis=0) | ||
nan_indexes = self.panel.loc['ItemB', :, 'C'].index[ | ||
self.panel.loc['ItemB', :, 'C'].apply(np.isnan)] | ||
|
||
# Values from ItemA are filled into ItemB. | ||
assert_series_equal(filled.loc['ItemB', :, 'C'][nan_indexes], | ||
self.panel.loc['ItemA', :, 'C'][nan_indexes]) | ||
|
||
# Backfill along axis 0. | ||
filled = self.panel.fillna(method='backfill', axis=0) | ||
|
||
# The test data lacks values that can be backfilled on axis 0. | ||
assert_panel_equal(filled, self.panel) | ||
|
||
# Reverse the panel and backfill along axis 0, to properly test | ||
# backfill. | ||
reverse_panel = self.panel.reindex_axis(reversed(self.panel.axes[0])) | ||
filled = reverse_panel.fillna(method='bfill', axis=0) | ||
nan_indexes = reverse_panel.loc['ItemB', :, 'C'].index[ | ||
reverse_panel.loc['ItemB', :, 'C'].isnull()] | ||
assert_series_equal(filled.loc['ItemB', :, 'C'][nan_indexes], | ||
reverse_panel.loc['ItemA', :, 'C'][nan_indexes]) | ||
|
||
# Fill along axis 0 with limit. | ||
filled = self.panel.fillna(method='ffill', axis=0, limit=1) | ||
a_nan = self.panel.loc['ItemA', :, 'C'].index[ | ||
self.panel.loc['ItemA', :, 'C'].apply(np.isnan)] | ||
b_nan = self.panel.loc['ItemB', :, 'C'].index[ | ||
self.panel.loc['ItemB', :, 'C'].apply(np.isnan)] | ||
|
||
# Cells that are nan in ItemB but not in ItemA remain unfilled in | ||
# ItemC. | ||
self.assertTrue( | ||
filled.loc['ItemC', :, 'C'][b_nan.difference(a_nan)].apply(np.isnan).all()) | ||
|
||
# limit not implemented when only value is specified | ||
panel = self.panel.copy() | ||
panel['str'] = 'foo' | ||
self.assertRaises(NotImplementedError, | ||
lambda: panel.fillna(method='ffill', axis=0)) | ||
|
||
def test_ffill_bfill(self): | ||
assert_panel_equal(self.panel.ffill(), | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
need to document this 'change' in the doc-string