Skip to content

Commit 8048c0f

Browse files
committed
BUG: fix Panel.fillna() ignoring axis parameter
1 parent 7d2022a commit 8048c0f

File tree

6 files changed

+239
-57
lines changed

6 files changed

+239
-57
lines changed

pandas/core/categorical.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from pandas.core.algorithms import factorize
1111
from pandas.core.base import PandasObject, PandasDelegate, NoNewAttributesMixin
1212
import pandas.core.common as com
13-
from pandas.core.missing import interpolate_2d
13+
from pandas.core.missing import pad
1414
from pandas.util.decorators import cache_readonly, deprecate_kwarg
1515

1616
from pandas.core.common import (ABCSeries, ABCIndexClass, ABCPeriodIndex, ABCCategoricalIndex,
@@ -1340,8 +1340,7 @@ def fillna(self, value=None, method=None, limit=None):
13401340
if method is not None:
13411341

13421342
values = self.to_dense().reshape(-1, len(self))
1343-
values = interpolate_2d(
1344-
values, method, 0, None, value).astype(self.categories.dtype)[0]
1343+
values = pad(values, method, 0, None, value).astype(self.categories.dtype)[0]
13451344
values = _get_codes_for_values(values, self.categories)
13461345

13471346
else:

pandas/core/generic.py

+17-29
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
from pandas.tseries.period import PeriodIndex
1717
from pandas.core.internals import BlockManager
1818
import pandas.core.common as com
19-
import pandas.core.missing as mis
19+
import pandas.core.missing as missing
2020
import pandas.core.datetools as datetools
2121
from pandas import compat
2222
from pandas.compat import map, zip, lrange, string_types, isidentifier
@@ -51,7 +51,7 @@ def _single_replace(self, to_replace, method, inplace, limit):
5151

5252
orig_dtype = self.dtype
5353
result = self if inplace else self.copy()
54-
fill_f = mis._get_fill_func(method)
54+
fill_f = missing._get_fill_func(method)
5555

5656
mask = com.mask_missing(result.values, to_replace)
5757
values = fill_f(result.values, limit=limit, mask=mask)
@@ -1929,7 +1929,7 @@ def reindex(self, *args, **kwargs):
19291929

19301930
# construct the args
19311931
axes, kwargs = self._construct_axes_from_arguments(args, kwargs)
1932-
method = mis._clean_reindex_fill_method(kwargs.pop('method', None))
1932+
method = missing._clean_reindex_fill_method(kwargs.pop('method', None))
19331933
level = kwargs.pop('level', None)
19341934
copy = kwargs.pop('copy', True)
19351935
limit = kwargs.pop('limit', None)
@@ -2042,7 +2042,7 @@ def reindex_axis(self, labels, axis=0, method=None, level=None, copy=True,
20422042

20432043
axis_name = self._get_axis_name(axis)
20442044
axis_values = self._get_axis(axis_name)
2045-
method = mis._clean_reindex_fill_method(method)
2045+
method = missing._clean_reindex_fill_method(method)
20462046
new_index, indexer = axis_values.reindex(labels, method, level,
20472047
limit=limit)
20482048
return self._reindex_with_indexers(
@@ -2774,40 +2774,28 @@ def fillna(self, value=None, method=None, axis=None, inplace=False,
27742774
# set the default here, so functions examining the signaure
27752775
# can detect if something was set (e.g. in groupby) (GH9221)
27762776
if axis is None:
2777-
axis = 0
2777+
axis = self._stat_axis_name
27782778
axis = self._get_axis_number(axis)
2779-
method = mis._clean_fill_method(method)
2779+
method = missing._clean_fill_method(method)
27802780

27812781
from pandas import DataFrame
27822782
if value is None:
27832783
if method is None:
27842784
raise ValueError('must specify a fill method or value')
2785-
if self._is_mixed_type and axis == 1:
2785+
if self._is_mixed_type:
2786+
if (self.ndim > 2) and (axis == 0):
2787+
raise NotImplementedError('cannot fill across axis 0 for mixed dtypes')
27862788
if inplace:
2787-
raise NotImplementedError()
2788-
result = self.T.fillna(method=method, limit=limit).T
2789-
2790-
# need to downcast here because of all of the transposes
2791-
result._data = result._data.downcast()
2792-
2793-
return result
2794-
2795-
# > 3d
2796-
if self.ndim > 3:
2797-
raise NotImplementedError(
2798-
'Cannot fillna with a method for > 3dims'
2799-
)
2789+
raise NotImplementedError('cannot fill inplace for mixed dtypes')
2790+
elif (self.ndim == 2) and (axis == 1):
2791+
result = self.T.fillna(method=method, limit=limit).T
28002792

2801-
# 3d
2802-
elif self.ndim == 3:
2793+
# need to downcast here because of all of the transposes
2794+
result._data = result._data.downcast()
28032795

2804-
# fill in 2d chunks
2805-
result = dict([(col, s.fillna(method=method, value=value))
2806-
for col, s in compat.iteritems(self)])
2807-
return self._constructor.from_dict(result).__finalize__(self)
2796+
return result
28082797

2809-
# 2d or less
2810-
method = mis._clean_fill_method(method)
2798+
method = missing._clean_fill_method(method)
28112799
new_data = self._data.interpolate(method=method,
28122800
axis=axis,
28132801
limit=limit,
@@ -3750,7 +3738,7 @@ def align(self, other, join='outer', axis=None, level=None, copy=True,
37503738
fill_value=None, method=None, limit=None, fill_axis=0,
37513739
broadcast_axis=None):
37523740
from pandas import DataFrame, Series
3753-
method = mis._clean_fill_method(method)
3741+
method = missing._clean_fill_method(method)
37543742

37553743
if broadcast_axis == 1 and self.ndim != other.ndim:
37563744
if isinstance(self, Series):

pandas/core/internals.py

+14-14
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
from pandas.core.categorical import Categorical, maybe_to_categorical
2626
from pandas.tseries.index import DatetimeIndex
2727
import pandas.core.common as com
28-
import pandas.core.missing as mis
28+
import pandas.core.missing as missing
2929
import pandas.core.convert as convert
3030
from pandas.sparse.array import _maybe_to_sparse, SparseArray
3131
import pandas.lib as lib
@@ -853,7 +853,7 @@ def check_int_bool(self, inplace):
853853

854854
# a fill na type method
855855
try:
856-
m = mis._clean_fill_method(method)
856+
m = missing._clean_fill_method(method)
857857
except:
858858
m = None
859859

@@ -871,7 +871,7 @@ def check_int_bool(self, inplace):
871871
mgr=mgr)
872872
# try an interp method
873873
try:
874-
m = mis._clean_interp_method(method, **kwargs)
874+
m = missing._clean_interp_method(method, **kwargs)
875875
except:
876876
m = None
877877

@@ -910,12 +910,12 @@ def _interpolate_with_fill(self, method='pad', axis=0, inplace=False,
910910
values = self.values if inplace else self.values.copy()
911911
values, _, fill_value, _ = self._try_coerce_args(values, fill_value)
912912
values = self._try_operate(values)
913-
values = mis.interpolate_2d(values,
914-
method=method,
915-
axis=axis,
916-
limit=limit,
917-
fill_value=fill_value,
918-
dtype=self.dtype)
913+
values = missing.pad(values,
914+
method=method,
915+
axis=axis,
916+
limit=limit,
917+
fill_value=fill_value,
918+
dtype=self.dtype)
919919
values = self._try_coerce_result(values)
920920

921921
blocks = [self.make_block(values,
@@ -950,8 +950,8 @@ def func(x):
950950

951951
# process a 1-d slice, returning it
952952
# should the axis argument be handled below in apply_along_axis?
953-
# i.e. not an arg to mis.interpolate_1d
954-
return mis.interpolate_1d(index, x, method=method, limit=limit,
953+
# i.e. not an arg to missing.interpolate
954+
return missing.interpolate(index, x, method=method, limit=limit,
955955
limit_direction=limit_direction,
956956
fill_value=fill_value,
957957
bounds_error=False, **kwargs)
@@ -2358,7 +2358,7 @@ def make_block_same_class(self, values, placement,
23582358
def interpolate(self, method='pad', axis=0, inplace=False,
23592359
limit=None, fill_value=None, **kwargs):
23602360

2361-
values = mis.interpolate_2d(
2361+
values = missing.pad(
23622362
self.values.to_dense(), method, axis, limit, fill_value)
23632363
return self.make_block_same_class(values=values,
23642364
placement=self.mgr_locs)
@@ -3774,8 +3774,8 @@ def reindex(self, new_axis, indexer=None, method=None, fill_value=None,
37743774

37753775
# fill if needed
37763776
if method is not None or limit is not None:
3777-
new_values = mis.interpolate_2d(new_values, method=method,
3778-
limit=limit, fill_value=fill_value)
3777+
new_values = missing.pad(new_values, method=method,
3778+
limit=limit, fill_value=fill_value)
37793779

37803780
if self._block.is_sparse:
37813781
make_block = self._block.make_block_same_class

pandas/core/missing.py

+31-9
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,9 @@ def _clean_interp_method(method, **kwargs):
4949
return method
5050

5151

52-
def interpolate_1d(xvalues, yvalues, method='linear', limit=None,
53-
limit_direction='forward',
54-
fill_value=None, bounds_error=False, order=None, **kwargs):
52+
def interpolate(xvalues, yvalues, method='linear', limit=None,
53+
limit_direction='forward',
54+
fill_value=None, bounds_error=False, order=None, **kwargs):
5555
"""
5656
Logic for the 1-d interpolation. The result should be 1-d, inputs
5757
xvalues and yvalues will each be 1-d arrays of the same length.
@@ -219,20 +219,42 @@ def _interpolate_scipy_wrapper(x, y, new_x, method, fill_value=None,
219219
return new_y
220220

221221

222-
def interpolate_2d(values, method='pad', axis=0, limit=None, fill_value=None, dtype=None):
223-
""" perform an actual interpolation of values, values will be make 2-d if
224-
needed fills inplace, returns the result
222+
def pad(values, method='pad', axis=0, limit=None, fill_value=None, dtype=None):
223+
"""
224+
Perform an actual interpolation of values. 1-d values will be made 2-d temporarily.
225+
Returns the result
225226
"""
226227

227-
transf = (lambda x: x) if axis == 0 else (lambda x: x.T)
228+
ndim = values.ndim
228229

229230
# reshape a 1 dim if needed
230-
ndim = values.ndim
231-
if values.ndim == 1:
231+
if ndim == 1:
232232
if axis != 0: # pragma: no cover
233233
raise AssertionError("cannot interpolate on a ndim == 1 with "
234234
"axis != 0")
235235
values = values.reshape(tuple((1,) + values.shape))
236+
# recursively slice n-dimension frames (n>2) into (n-1)-dimension frames
237+
elif ndim > 2:
238+
slice_axis = 1 if axis == 0 else 0
239+
slicer = [slice(None)]*ndim
240+
241+
if ndim == 3:
242+
axis = 0 if (axis > 1) else 1
243+
else:
244+
axis = axis - 1 if (axis > 0) else 0
245+
246+
for n in range(values.shape[slice_axis]):
247+
slicer[slice_axis] = n
248+
values[slicer] = pad(values[slicer],
249+
method=method,
250+
axis=axis,
251+
limit=limit,
252+
fill_value=fill_value,
253+
dtype=dtype)
254+
255+
return values
256+
257+
transf = (lambda x: x) if axis == 0 else (lambda x: x.T)
236258

237259
if fill_value is None:
238260
mask = None

pandas/tests/test_panel.py

+79-1
Original file line numberDiff line numberDiff line change
@@ -1454,20 +1454,98 @@ def test_fillna(self):
14541454
assert_frame_equal(filled['ItemA'],
14551455
panel['ItemA'].fillna(method='backfill'))
14561456

1457+
# GH 11445
1458+
# Fill forward.
1459+
filled = self.panel.fillna(method='ffill')
1460+
assert_frame_equal(filled['ItemA'],
1461+
self.panel['ItemA'].fillna(method='ffill'))
1462+
1463+
# With limit.
1464+
filled = self.panel.fillna(method='backfill', limit=1)
1465+
assert_frame_equal(filled['ItemA'],
1466+
self.panel['ItemA'].fillna(method='backfill', limit=1))
1467+
1468+
# With downcast.
1469+
rounded = self.panel.apply(lambda x: x.apply(np.round))
1470+
filled = rounded.fillna(method='backfill', downcast='infer')
1471+
assert_frame_equal(filled['ItemA'],
1472+
rounded['ItemA'].fillna(method='backfill', downcast='infer'))
1473+
1474+
# Now explicitly request axis 1.
1475+
filled = self.panel.fillna(method='backfill', axis=1)
1476+
assert_frame_equal(filled['ItemA'],
1477+
self.panel['ItemA'].fillna(method='backfill', axis=0))
1478+
1479+
# Fill along axis 2, equivalent to filling along axis 1 of each
1480+
# DataFrame.
1481+
filled = self.panel.fillna(method='backfill', axis=2)
1482+
assert_frame_equal(filled['ItemA'],
1483+
self.panel['ItemA'].fillna(method='backfill', axis=1))
1484+
1485+
# Fill an empty panel.
14571486
empty = self.panel.reindex(items=[])
14581487
filled = empty.fillna(0)
14591488
assert_panel_equal(filled, empty)
14601489

1490+
# either method or value must be specified
14611491
self.assertRaises(ValueError, self.panel.fillna)
1492+
1493+
# method and value can not both be specified
14621494
self.assertRaises(ValueError, self.panel.fillna, 5, method='ffill')
14631495

1496+
# can't pass list or tuple, only scalar
14641497
self.assertRaises(TypeError, self.panel.fillna, [1, 2])
14651498
self.assertRaises(TypeError, self.panel.fillna, (1, 2))
14661499

14671500
# limit not implemented when only value is specified
14681501
p = Panel(np.random.randn(3,4,5))
14691502
p.iloc[0:2,0:2,0:2] = np.nan
1470-
self.assertRaises(NotImplementedError, lambda : p.fillna(999,limit=1))
1503+
self.assertRaises(NotImplementedError, lambda : p.fillna(999, limit=1))
1504+
1505+
def test_fillna_axis_0(self):
1506+
# GH 11445
1507+
1508+
# Forward fill along axis 0, interpolating values across DataFrames.
1509+
filled = self.panel.fillna(method='ffill', axis=0)
1510+
nan_indexes = self.panel.loc['ItemB', :, 'C'].index[
1511+
self.panel.loc['ItemB', :, 'C'].apply(np.isnan)]
1512+
1513+
# Values from ItemA are filled into ItemB.
1514+
assert_series_equal(filled.loc['ItemB', :, 'C'][nan_indexes],
1515+
self.panel.loc['ItemA', :, 'C'][nan_indexes])
1516+
1517+
# Backfill along axis 0.
1518+
filled = self.panel.fillna(method='backfill', axis=0)
1519+
1520+
# The test data lacks values that can be backfilled on axis 0.
1521+
assert_panel_equal(filled, self.panel)
1522+
1523+
# Reverse the panel and backfill along axis 0, to properly test
1524+
# backfill.
1525+
reverse_panel = self.panel.reindex_axis(reversed(self.panel.axes[0]))
1526+
filled = reverse_panel.fillna(method='bfill', axis=0)
1527+
nan_indexes = reverse_panel.loc['ItemB', :, 'C'].index[
1528+
reverse_panel.loc['ItemB', :, 'C'].isnull()]
1529+
assert_series_equal(filled.loc['ItemB', :, 'C'][nan_indexes],
1530+
reverse_panel.loc['ItemA', :, 'C'][nan_indexes])
1531+
1532+
# Fill along axis 0 with limit.
1533+
filled = self.panel.fillna(method='ffill', axis=0, limit=1)
1534+
a_nan = self.panel.loc['ItemA', :, 'C'].index[
1535+
self.panel.loc['ItemA', :, 'C'].apply(np.isnan)]
1536+
b_nan = self.panel.loc['ItemB', :, 'C'].index[
1537+
self.panel.loc['ItemB', :, 'C'].apply(np.isnan)]
1538+
1539+
# Cells that are nan in ItemB but not in ItemA remain unfilled in
1540+
# ItemC.
1541+
self.assertTrue(
1542+
filled.loc['ItemC', :, 'C'][b_nan.difference(a_nan)].apply(np.isnan).all())
1543+
1544+
# limit not implemented when only value is specified
1545+
panel = self.panel.copy()
1546+
panel['str'] = 'foo'
1547+
self.assertRaises(NotImplementedError,
1548+
lambda: panel.fillna(method='ffill', axis=0))
14711549

14721550
def test_ffill_bfill(self):
14731551
assert_panel_equal(self.panel.ffill(),

0 commit comments

Comments
 (0)