Skip to content

BUG: Panel.fillna with method='ffill' ignores the axis parameter (GH8251) #8401

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 70 additions & 27 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -2220,7 +2220,7 @@ def convert_objects(self, convert_dates=True, convert_numeric=False,
#----------------------------------------------------------------------
# Filling NA's

def fillna(self, value=None, method=None, axis=0, inplace=False,
def fillna(self, value=None, method=None, axis=None, inplace=False,
limit=None, downcast=None):
"""
Fill NA/NaN values using the specified method
Expand All @@ -2236,9 +2236,11 @@ def fillna(self, value=None, method=None, axis=0, inplace=False,
values specifying which value to use for each index (for a Series) or
column (for a DataFrame). (values not in the dict/Series/DataFrame will not be
filled). This value cannot be a list.
axis : {0, 1}, default 0
axis : {0, 1, 2}
Fill along this axis. For a DataFrame:
* 0: fill column-by-column
* 1: fill row-by-row
Default: fill column-by-column (0 for DataFrame, 1 for Panel)
inplace : boolean, default False
If True, fill in place. Note: this will modify any
other views on this object, (e.g. a no-copy slice for a column in a
Expand All @@ -2263,45 +2265,76 @@ def fillna(self, value=None, method=None, axis=0, inplace=False,
'you passed a "{0}"'.format(type(value).__name__))
self._consolidate_inplace()

if axis is None:
axis = max(0, self.ndim - 2)

axis = self._get_axis_number(axis)
method = com._clean_fill_method(method)

from pandas import DataFrame
if value is None:
if method is None:
raise ValueError('must specify a fill method or value')
if self._is_mixed_type and axis == 1:

# 2d or less
if self.ndim <= 2:
if self._is_mixed_type and axis == 1:
if inplace:
raise NotImplementedError(
'Cannot fill mixed type on axis 1 in place')
if downcast:
raise NotImplementedError(
'Cannot fill mixed type on axis 1 with downcast')

result = self.T.fillna(method=method, limit=limit).T

# need to downcast here because of all of the transposes
result._data = result._data.downcast()

return result

method = com._clean_fill_method(method)
new_data = self._data.interpolate(method=method,
axis=axis,
limit=limit,
inplace=inplace,
coerce=True,
downcast=downcast)

# 3d
elif self.ndim == 3:
if inplace:
raise NotImplementedError()
result = self.T.fillna(method=method, limit=limit).T
raise NotImplementedError('Cannot fill Panel in place')

# need to downcast here because of all of the transposes
result._data = result._data.downcast()
if axis == 0:
if downcast:
raise NotImplementedError(
'Cannot fill Panel on axis 0 with downcast')

return result
swapped = self.swapaxes(0, 1, copy=False)
filled = swapped.fillna(method=method, axis=1, limit=limit)
result = filled.swapaxes(0, 1, copy=False)

# need to downcast here because of all of the transposes
result._data = result._data.downcast()

return result

else:
# fill in 2d chunks
result = dict([(col, s.fillna(method=method,
axis=axis - 1,
limit=limit,
downcast=downcast))
for col, s in compat.iteritems(self)])
return self._constructor.from_dict(result).__finalize__(self)

# > 3d
if self.ndim > 3:
elif self.ndim > 3:
raise NotImplementedError(
'Cannot fillna with a method for > 3dims'
)

# 3d
elif self.ndim == 3:

# fill in 2d chunks
result = dict([(col, s.fillna(method=method, value=value))
for col, s in compat.iteritems(self)])
return self._constructor.from_dict(result).__finalize__(self)

# 2d or less
method = com._clean_fill_method(method)
new_data = self._data.interpolate(method=method,
axis=axis,
limit=limit,
inplace=inplace,
coerce=True,
downcast=downcast)
else:
if method is not None:
raise ValueError('cannot specify both a fill method and value')
Expand All @@ -2324,23 +2357,33 @@ def fillna(self, value=None, method=None, axis=0, inplace=False,
downcast=downcast)

elif isinstance(value, (dict, com.ABCSeries)):
if self.ndim >= 3:
raise NotImplementedError('Cannot fillna with a dict/Series '
'for >= 3 dims')

if axis == 1:
raise NotImplementedError('Currently only can fill '
'with dict/Series column '
'by column')

if downcast:
raise NotImplementedError(
'Cannot downcast with dict/Series')

result = self if inplace else self.copy()
for k, v in compat.iteritems(value):
if k not in result:
continue
obj = result[k]
obj.fillna(v, limit=limit, inplace=True)
return result

elif not com.is_list_like(value):
new_data = self._data.fillna(value=value,
limit=limit,
inplace=inplace,
downcast=downcast)

elif isinstance(value, DataFrame) and self.ndim == 2:
new_data = self.where(self.notnull(), value)
else:
Expand All @@ -2351,12 +2394,12 @@ def fillna(self, value=None, method=None, axis=0, inplace=False,
else:
return self._constructor(new_data).__finalize__(self)

def ffill(self, axis=0, inplace=False, limit=None, downcast=None):
def ffill(self, axis=None, inplace=False, limit=None, downcast=None):
"Synonym for NDFrame.fillna(method='ffill')"
return self.fillna(method='ffill', axis=axis, inplace=inplace,
limit=limit, downcast=downcast)

def bfill(self, axis=0, inplace=False, limit=None, downcast=None):
def bfill(self, axis=None, inplace=False, limit=None, downcast=None):
"Synonym for NDFrame.fillna(method='bfill')"
return self.fillna(method='bfill', axis=axis, inplace=inplace,
limit=limit, downcast=downcast)
Expand Down
9 changes: 8 additions & 1 deletion pandas/tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -7628,9 +7628,11 @@ def test_fillna_dict_series(self):
expected = df.fillna(df.max().to_dict())
assert_frame_equal(result, expected)

# disable this for now
# disable these for now
with assertRaisesRegexp(NotImplementedError, 'column by column'):
df.fillna(df.max(1), axis=1)
with assertRaisesRegexp(NotImplementedError, 'downcast'):
df.fillna(df.max(1), downcast='infer')

def test_fillna_dataframe(self):
# GH 8377
Expand Down Expand Up @@ -7668,6 +7670,11 @@ def test_fillna_columns(self):
expected = df.astype(float).fillna(method='ffill', axis=1)
assert_frame_equal(result, expected)

# disable these for now
with assertRaisesRegexp(NotImplementedError, 'axis 1.*in place'):
df.fillna(method='ffill', axis=1, inplace=True)
with assertRaisesRegexp(NotImplementedError, 'axis 1.*downcast'):
df.fillna(method='ffill', axis=1, downcast='infer')

def test_fillna_invalid_method(self):
with assertRaisesRegexp(ValueError, 'ffil'):
Expand Down
77 changes: 77 additions & 0 deletions pandas/tests/test_panel.py
Original file line number Diff line number Diff line change
Expand Up @@ -1336,9 +1336,12 @@ def test_sort_index(self):
assert_panel_equal(sorted_panel, self.panel)

def test_fillna(self):
# Fill with a value.
filled = self.panel.fillna(0)
self.assertTrue(np.isfinite(filled.values).all())

# If no axis is specified, fill along axis 1, equivalent to axis 0 of
# each DataFrame.
filled = self.panel.fillna(method='backfill')
assert_frame_equal(filled['ItemA'],
self.panel['ItemA'].fillna(method='backfill'))
Expand All @@ -1350,10 +1353,72 @@ def test_fillna(self):
assert_frame_equal(filled['ItemA'],
panel['ItemA'].fillna(method='backfill'))

# Fill forward.
filled = self.panel.fillna(method='ffill')
assert_frame_equal(filled['ItemA'],
self.panel['ItemA'].fillna(method='ffill'))

# With limit.
filled = self.panel.fillna(method='backfill', limit=1)
assert_frame_equal(filled['ItemA'],
self.panel['ItemA'].fillna(method='backfill', limit=1))

# With downcast.
rounded = self.panel.apply(lambda x: x.apply(np.round))
filled = rounded.fillna(method='backfill', downcast='infer')
assert_frame_equal(filled['ItemA'],
rounded['ItemA'].fillna(method='backfill', downcast='infer'))

# Now explicitly request axis 1.
filled = self.panel.fillna(method='backfill', axis=1)
assert_frame_equal(filled['ItemA'],
self.panel['ItemA'].fillna(method='backfill', axis=0))

# Fill along axis 2, equivalent to filling along axis 1 of each
# DataFrame.
filled = self.panel.fillna(method='backfill', axis=2)
assert_frame_equal(filled['ItemA'],
self.panel['ItemA'].fillna(method='backfill', axis=1))

# Fill an empty panel.
empty = self.panel.reindex(items=[])
filled = empty.fillna(0)
assert_panel_equal(filled, empty)

def test_fillna_axis_0(self):
# Forward fill along axis 0, interpolating values across DataFrames.
filled = self.panel.fillna(method='ffill', axis=0)
nan_indexes = self.panel['ItemB']['C'].index[
self.panel['ItemB']['C'].apply(np.isnan)]
# Values from ItemA are filled into ItemB.
assert_series_equal(filled['ItemB']['C'][nan_indexes],
self.panel['ItemA']['C'][nan_indexes])

# Backfill along axis 0.
filled = self.panel.fillna(method='backfill', axis=0)
# The test data lacks values that can be backfilled on axis 0.
assert_panel_equal(filled, self.panel)
# Reverse the panel and backfill along axis 0, to properly test
# backfill.
reverse_panel = self.panel.reindex_axis(reversed(self.panel.axes[0]))
filled = reverse_panel.fillna(method='bfill', axis=0)
nan_indexes = reverse_panel['ItemB']['C'].index[
reverse_panel['ItemB']['C'].apply(np.isnan)]
assert_series_equal(filled['ItemB']['C'][nan_indexes],
reverse_panel['ItemA']['C'][nan_indexes])

# Fill along axis 0 with limit.
filled = self.panel.fillna(method='ffill', axis=0, limit=1)
a_nan = self.panel['ItemA']['C'].index[
self.panel['ItemA']['C'].apply(np.isnan)]
b_nan = self.panel['ItemB']['C'].index[
self.panel['ItemB']['C'].apply(np.isnan)]
# Cells that are nan in ItemB but not in ItemA remain unfilled in
# ItemC.
self.assertTrue(
filled['ItemC']['C'][b_nan.diff(a_nan)].apply(np.isnan).all())

def test_fillna_error(self):
self.assertRaises(ValueError, self.panel.fillna)
self.assertRaises(ValueError, self.panel.fillna, 5, method='ffill')

Expand All @@ -1365,6 +1430,18 @@ def test_fillna(self):
p.iloc[0:2,0:2,0:2] = np.nan
self.assertRaises(NotImplementedError, lambda : p.fillna(999,limit=1))

# Method fill is not implemented with inplace=True.
self.assertRaises(NotImplementedError,
lambda: self.panel.fillna(method='bfill', inplace=True))

# Method fill is not implemented with downcast on axis=0.
self.assertRaises(NotImplementedError,
lambda: self.panel.fillna(method='bfill', axis=0, downcast='infer'))

# Value fill is not implemented with dict.
self.assertRaises(NotImplementedError,
lambda: self.panel.fillna(value={'a': 'b'}))

def test_ffill_bfill(self):
assert_panel_equal(self.panel.ffill(),
self.panel.fillna(method='ffill'))
Expand Down