Skip to content

BUG: fix Panel.fillna() ignoring axis parameter (re-submission) #11445

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions pandas/core/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from pandas.core.algorithms import factorize
from pandas.core.base import PandasObject, PandasDelegate, NoNewAttributesMixin
import pandas.core.common as com
from pandas.core.missing import interpolate_2d
from pandas.core.missing import pad
from pandas.util.decorators import cache_readonly, deprecate_kwarg

from pandas.core.common import (ABCSeries, ABCIndexClass, ABCPeriodIndex, ABCCategoricalIndex,
Expand Down Expand Up @@ -1340,8 +1340,7 @@ def fillna(self, value=None, method=None, limit=None):
if method is not None:

values = self.to_dense().reshape(-1, len(self))
values = interpolate_2d(
values, method, 0, None, value).astype(self.categories.dtype)[0]
values = pad(values, method, 0, None, value).astype(self.categories.dtype)[0]
values = _get_codes_for_values(values, self.categories)

else:
Expand Down
46 changes: 17 additions & 29 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from pandas.tseries.period import PeriodIndex
from pandas.core.internals import BlockManager
import pandas.core.common as com
import pandas.core.missing as mis
import pandas.core.missing as missing
import pandas.core.datetools as datetools
from pandas import compat
from pandas.compat import map, zip, lrange, string_types, isidentifier
Expand Down Expand Up @@ -51,7 +51,7 @@ def _single_replace(self, to_replace, method, inplace, limit):

orig_dtype = self.dtype
result = self if inplace else self.copy()
fill_f = mis._get_fill_func(method)
fill_f = missing._get_fill_func(method)

mask = com.mask_missing(result.values, to_replace)
values = fill_f(result.values, limit=limit, mask=mask)
Expand Down Expand Up @@ -1929,7 +1929,7 @@ def reindex(self, *args, **kwargs):

# construct the args
axes, kwargs = self._construct_axes_from_arguments(args, kwargs)
method = mis._clean_reindex_fill_method(kwargs.pop('method', None))
method = missing._clean_reindex_fill_method(kwargs.pop('method', None))
level = kwargs.pop('level', None)
copy = kwargs.pop('copy', True)
limit = kwargs.pop('limit', None)
Expand Down Expand Up @@ -2042,7 +2042,7 @@ def reindex_axis(self, labels, axis=0, method=None, level=None, copy=True,

axis_name = self._get_axis_name(axis)
axis_values = self._get_axis(axis_name)
method = mis._clean_reindex_fill_method(method)
method = missing._clean_reindex_fill_method(method)
new_index, indexer = axis_values.reindex(labels, method, level,
limit=limit)
return self._reindex_with_indexers(
Expand Down Expand Up @@ -2774,40 +2774,28 @@ def fillna(self, value=None, method=None, axis=None, inplace=False,
# set the default here, so functions examining the signaure
# can detect if something was set (e.g. in groupby) (GH9221)
if axis is None:
axis = 0
axis = self._stat_axis_name
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

need to document this 'change' in the doc-string

axis = self._get_axis_number(axis)
method = mis._clean_fill_method(method)
method = missing._clean_fill_method(method)

from pandas import DataFrame
if value is None:
if method is None:
raise ValueError('must specify a fill method or value')
if self._is_mixed_type and axis == 1:
if self._is_mixed_type:
if (self.ndim > 2) and (axis == 0):
raise NotImplementedError('cannot fill across axis 0 for mixed dtypes')
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

too many if statements here, this should be a bit more general I think

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The way filling across mixed-dtype blocks for 2D frames is done is by transposing axes. This is actually what I proposed doing in my original PR from last year to extend filling across higher dimensionality frames, but you rejected that idea. I could fill 2D frames using the same general method I'm proposing here, but that will result in in some backwards incompatible behavior and users that depend on being able to fill across mixed-type blocks for 2D would no longer be able to do so.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't recall that. my point is that this should be much simpler logic. Too many if statements here (and in the internals).

if inplace:
raise NotImplementedError()
result = self.T.fillna(method=method, limit=limit).T

# need to downcast here because of all of the transposes
result._data = result._data.downcast()

return result

# > 3d
if self.ndim > 3:
raise NotImplementedError(
'Cannot fillna with a method for > 3dims'
)
raise NotImplementedError('cannot fill inplace for mixed dtypes')
elif (self.ndim == 2) and (axis == 1):
result = self.T.fillna(method=method, limit=limit).T

# 3d
elif self.ndim == 3:
# need to downcast here because of all of the transposes
result._data = result._data.downcast()

# fill in 2d chunks
result = dict([(col, s.fillna(method=method, value=value))
for col, s in compat.iteritems(self)])
return self._constructor.from_dict(result).__finalize__(self)
return result

# 2d or less
method = mis._clean_fill_method(method)
method = missing._clean_fill_method(method)
new_data = self._data.interpolate(method=method,
axis=axis,
limit=limit,
Expand Down Expand Up @@ -3750,7 +3738,7 @@ def align(self, other, join='outer', axis=None, level=None, copy=True,
fill_value=None, method=None, limit=None, fill_axis=0,
broadcast_axis=None):
from pandas import DataFrame, Series
method = mis._clean_fill_method(method)
method = missing._clean_fill_method(method)

if broadcast_axis == 1 and self.ndim != other.ndim:
if isinstance(self, Series):
Expand Down
28 changes: 14 additions & 14 deletions pandas/core/internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from pandas.core.categorical import Categorical, maybe_to_categorical
from pandas.tseries.index import DatetimeIndex
import pandas.core.common as com
import pandas.core.missing as mis
import pandas.core.missing as missing
import pandas.core.convert as convert
from pandas.sparse.array import _maybe_to_sparse, SparseArray
import pandas.lib as lib
Expand Down Expand Up @@ -853,7 +853,7 @@ def check_int_bool(self, inplace):

# a fill na type method
try:
m = mis._clean_fill_method(method)
m = missing._clean_fill_method(method)
except:
m = None

Expand All @@ -871,7 +871,7 @@ def check_int_bool(self, inplace):
mgr=mgr)
# try an interp method
try:
m = mis._clean_interp_method(method, **kwargs)
m = missing._clean_interp_method(method, **kwargs)
except:
m = None

Expand Down Expand Up @@ -910,12 +910,12 @@ def _interpolate_with_fill(self, method='pad', axis=0, inplace=False,
values = self.values if inplace else self.values.copy()
values, _, fill_value, _ = self._try_coerce_args(values, fill_value)
values = self._try_operate(values)
values = mis.interpolate_2d(values,
method=method,
axis=axis,
limit=limit,
fill_value=fill_value,
dtype=self.dtype)
values = missing.pad(values,
method=method,
axis=axis,
limit=limit,
fill_value=fill_value,
dtype=self.dtype)
values = self._try_coerce_result(values)

blocks = [self.make_block(values,
Expand Down Expand Up @@ -950,8 +950,8 @@ def func(x):

# process a 1-d slice, returning it
# should the axis argument be handled below in apply_along_axis?
# i.e. not an arg to mis.interpolate_1d
return mis.interpolate_1d(index, x, method=method, limit=limit,
# i.e. not an arg to missing.interpolate
return missing.interpolate(index, x, method=method, limit=limit,
limit_direction=limit_direction,
fill_value=fill_value,
bounds_error=False, **kwargs)
Expand Down Expand Up @@ -2358,7 +2358,7 @@ def make_block_same_class(self, values, placement,
def interpolate(self, method='pad', axis=0, inplace=False,
limit=None, fill_value=None, **kwargs):

values = mis.interpolate_2d(
values = missing.pad(
self.values.to_dense(), method, axis, limit, fill_value)
return self.make_block_same_class(values=values,
placement=self.mgr_locs)
Expand Down Expand Up @@ -3774,8 +3774,8 @@ def reindex(self, new_axis, indexer=None, method=None, fill_value=None,

# fill if needed
if method is not None or limit is not None:
new_values = mis.interpolate_2d(new_values, method=method,
limit=limit, fill_value=fill_value)
new_values = missing.pad(new_values, method=method,
limit=limit, fill_value=fill_value)

if self._block.is_sparse:
make_block = self._block.make_block_same_class
Expand Down
40 changes: 31 additions & 9 deletions pandas/core/missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,9 @@ def _clean_interp_method(method, **kwargs):
return method


def interpolate_1d(xvalues, yvalues, method='linear', limit=None,
limit_direction='forward',
fill_value=None, bounds_error=False, order=None, **kwargs):
def interpolate(xvalues, yvalues, method='linear', limit=None,
limit_direction='forward',
fill_value=None, bounds_error=False, order=None, **kwargs):
"""
Logic for the 1-d interpolation. The result should be 1-d, inputs
xvalues and yvalues will each be 1-d arrays of the same length.
Expand Down Expand Up @@ -219,20 +219,42 @@ def _interpolate_scipy_wrapper(x, y, new_x, method, fill_value=None,
return new_y


def interpolate_2d(values, method='pad', axis=0, limit=None, fill_value=None, dtype=None):
""" perform an actual interpolation of values, values will be make 2-d if
needed fills inplace, returns the result
def pad(values, method='pad', axis=0, limit=None, fill_value=None, dtype=None):
"""
Perform an actual interpolation of values. 1-d values will be made 2-d temporarily.
Returns the result
"""

transf = (lambda x: x) if axis == 0 else (lambda x: x.T)
ndim = values.ndim

# reshape a 1 dim if needed
ndim = values.ndim
if values.ndim == 1:
if ndim == 1:
if axis != 0: # pragma: no cover
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should be handled higher up

raise AssertionError("cannot interpolate on a ndim == 1 with "
"axis != 0")
values = values.reshape(tuple((1,) + values.shape))
# recursively slice n-dimension frames (n>2) into (n-1)-dimension frames
elif ndim > 2:
slice_axis = 1 if axis == 0 else 0
slicer = [slice(None)]*ndim

if ndim == 3:
axis = 0 if (axis > 1) else 1
else:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you are changing the meaning of axis here. it is exactly [slice(None)]*ndim indexed by axis for all types.
this is equivalent to .loc[.....] and NOT repeated sectioning. I know that may be confusing, but I said that for your tests are doing incorrect indexing. pls make the changes.

axis = axis - 1 if (axis > 0) else 0

for n in range(values.shape[slice_axis]):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same comments as before

slicer[slice_axis] = n
values[slicer] = pad(values[slicer],
method=method,
axis=axis,
limit=limit,
fill_value=fill_value,
dtype=dtype)

return values

transf = (lambda x: x) if axis == 0 else (lambda x: x.T)

if fill_value is None:
mask = None
Expand Down
80 changes: 79 additions & 1 deletion pandas/tests/test_panel.py
Original file line number Diff line number Diff line change
Expand Up @@ -1454,20 +1454,98 @@ def test_fillna(self):
assert_frame_equal(filled['ItemA'],
panel['ItemA'].fillna(method='backfill'))

# GH 11445
# Fill forward.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add the issue number here as a comment.

filled = self.panel.fillna(method='ffill')
assert_frame_equal(filled['ItemA'],
self.panel['ItemA'].fillna(method='ffill'))

# With limit.
filled = self.panel.fillna(method='backfill', limit=1)
assert_frame_equal(filled['ItemA'],
self.panel['ItemA'].fillna(method='backfill', limit=1))

# With downcast.
rounded = self.panel.apply(lambda x: x.apply(np.round))
filled = rounded.fillna(method='backfill', downcast='infer')
assert_frame_equal(filled['ItemA'],
rounded['ItemA'].fillna(method='backfill', downcast='infer'))

# Now explicitly request axis 1.
filled = self.panel.fillna(method='backfill', axis=1)
assert_frame_equal(filled['ItemA'],
self.panel['ItemA'].fillna(method='backfill', axis=0))

# Fill along axis 2, equivalent to filling along axis 1 of each
# DataFrame.
filled = self.panel.fillna(method='backfill', axis=2)
assert_frame_equal(filled['ItemA'],
self.panel['ItemA'].fillna(method='backfill', axis=1))

# Fill an empty panel.
empty = self.panel.reindex(items=[])
filled = empty.fillna(0)
assert_panel_equal(filled, empty)

# either method or value must be specified
self.assertRaises(ValueError, self.panel.fillna)

# method and value can not both be specified
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add a blank line between tests.

self.assertRaises(ValueError, self.panel.fillna, 5, method='ffill')

# can't pass list or tuple, only scalar
self.assertRaises(TypeError, self.panel.fillna, [1, 2])
self.assertRaises(TypeError, self.panel.fillna, (1, 2))

# limit not implemented when only value is specified
p = Panel(np.random.randn(3,4,5))
p.iloc[0:2,0:2,0:2] = np.nan
self.assertRaises(NotImplementedError, lambda : p.fillna(999,limit=1))
self.assertRaises(NotImplementedError, lambda : p.fillna(999, limit=1))

def test_fillna_axis_0(self):
# GH 11445

# Forward fill along axis 0, interpolating values across DataFrames.
filled = self.panel.fillna(method='ffill', axis=0)
nan_indexes = self.panel.loc['ItemB', :, 'C'].index[
self.panel.loc['ItemB', :, 'C'].apply(np.isnan)]

# Values from ItemA are filled into ItemB.
assert_series_equal(filled.loc['ItemB', :, 'C'][nan_indexes],
self.panel.loc['ItemA', :, 'C'][nan_indexes])

# Backfill along axis 0.
filled = self.panel.fillna(method='backfill', axis=0)

# The test data lacks values that can be backfilled on axis 0.
assert_panel_equal(filled, self.panel)

# Reverse the panel and backfill along axis 0, to properly test
# backfill.
reverse_panel = self.panel.reindex_axis(reversed(self.panel.axes[0]))
filled = reverse_panel.fillna(method='bfill', axis=0)
nan_indexes = reverse_panel.loc['ItemB', :, 'C'].index[
reverse_panel.loc['ItemB', :, 'C'].isnull()]
assert_series_equal(filled.loc['ItemB', :, 'C'][nan_indexes],
reverse_panel.loc['ItemA', :, 'C'][nan_indexes])

# Fill along axis 0 with limit.
filled = self.panel.fillna(method='ffill', axis=0, limit=1)
a_nan = self.panel.loc['ItemA', :, 'C'].index[
self.panel.loc['ItemA', :, 'C'].apply(np.isnan)]
b_nan = self.panel.loc['ItemB', :, 'C'].index[
self.panel.loc['ItemB', :, 'C'].apply(np.isnan)]

# Cells that are nan in ItemB but not in ItemA remain unfilled in
# ItemC.
self.assertTrue(
filled.loc['ItemC', :, 'C'][b_nan.difference(a_nan)].apply(np.isnan).all())

# limit not implemented when only value is specified
panel = self.panel.copy()
panel['str'] = 'foo'
self.assertRaises(NotImplementedError,
lambda: panel.fillna(method='ffill', axis=0))

def test_ffill_bfill(self):
assert_panel_equal(self.panel.ffill(),
Expand Down
Loading