From d9f44ed781ec806fa8844bcb45ef84ed1a5231bf Mon Sep 17 00:00:00 2001 From: jorvis Date: Fri, 13 Sep 2019 21:40:20 -0500 Subject: [PATCH 1/8] Fix for fillna ignoring axis=1 parameter (issues #17399 #17409) --- pandas/core/generic.py | 2 +- pandas/core/internals/blocks.py | 17 +++++++---------- pandas/tests/frame/test_missing.py | 8 ++++++++ 3 files changed, 16 insertions(+), 11 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 68308b2f83b60..7b5feca3605d2 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6274,7 +6274,7 @@ def fillna( ) new_data = self._data.fillna( - value=value, limit=limit, inplace=inplace, downcast=downcast + value=value, axis=axis, limit=limit, inplace=inplace, downcast=downcast ) elif isinstance(value, (dict, ABCSeries)): diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 29d225443d18a..d1e081b8217ea 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -386,7 +386,7 @@ def apply(self, func, **kwargs): return result - def fillna(self, value, limit=None, inplace=False, downcast=None): + def fillna(self, value, axis=0, limit=None, inplace=False, downcast=None): """ fillna on the block with the value. If we fail, then convert to ObjectBlock and try again """ @@ -398,13 +398,10 @@ def fillna(self, value, limit=None, inplace=False, downcast=None): raise ValueError("Limit must be an integer") if limit < 1: raise ValueError("Limit must be greater than 0") - mask[mask.cumsum(self.ndim - 1) > limit] = False + mask[mask.cumsum(int(axis == 0 and self.ndim > 1)) > limit] = False if not self._can_hold_na: - if inplace: - return self - else: - return self.copy() + return self if inplace else self.copy() if self._can_hold_element(value): # equivalent: _try_coerce_args(value) would not raise @@ -1842,7 +1839,7 @@ def concat_same_type(self, to_concat, placement=None): placement = placement or slice(0, len(values), 1) return self.make_block_same_class(values, ndim=self.ndim, placement=placement) - def fillna(self, value, limit=None, inplace=False, downcast=None): + def fillna(self, value, axis=0, limit=None, inplace=False, downcast=None): values = self.values if inplace else self.values.copy() values = values.fillna(value=value, limit=limit) return [ @@ -2406,15 +2403,15 @@ def concat_same_type(self, to_concat, placement=None): return ObjectBlock(values, ndim=self.ndim, placement=placement) return super().concat_same_type(to_concat, placement) - def fillna(self, value, limit=None, inplace=False, downcast=None): + def fillna(self, value, axis=0, limit=None, inplace=False, downcast=None): # We support filling a DatetimeTZ with a `value` whose timezone # is different by coercing to object. if self._can_hold_element(value): - return super().fillna(value, limit, inplace, downcast) + return super().fillna(value, axis, limit, inplace, downcast) # different timezones, or a non-tz return self.astype(object).fillna( - value, limit=limit, inplace=inplace, downcast=downcast + value, axis=axis, limit=limit, inplace=inplace, downcast=downcast ) def setitem(self, indexer, value): diff --git a/pandas/tests/frame/test_missing.py b/pandas/tests/frame/test_missing.py index 94667ecfa837d..5469de28f9019 100644 --- a/pandas/tests/frame/test_missing.py +++ b/pandas/tests/frame/test_missing.py @@ -671,6 +671,14 @@ def test_fillna_columns(self): expected = df.astype(float).fillna(method="ffill", axis=1) assert_frame_equal(result, expected) + def test_fillna_columns(self): + df = DataFrame(np.random.randn(10, 4)) + df.iloc[1:4, 1:4] = nan + expected = df.copy() + expected.iloc[1:4, 1:4] = 0 + result = df.fillna(value=0, axis=1) + tm.assert_frame_equal(result, expected) + def test_fillna_invalid_method(self, float_frame): with pytest.raises(ValueError, match="ffil"): float_frame.fillna(method="ffil") From 0ed4f1591d4a573a364d6f732c437e426944af14 Mon Sep 17 00:00:00 2001 From: jorvis Date: Fri, 13 Sep 2019 21:56:27 -0500 Subject: [PATCH 2/8] PEP8 fix - line too long nonsense --- pandas/core/generic.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 7b5feca3605d2..4db03bc7bcfef 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6274,7 +6274,8 @@ def fillna( ) new_data = self._data.fillna( - value=value, axis=axis, limit=limit, inplace=inplace, downcast=downcast + value=value, axis=axis, limit=limit, inplace=inplace, + downcast=downcast ) elif isinstance(value, (dict, ABCSeries)): From 6837029e76195d66513e94a058180dc7b8de38bc Mon Sep 17 00:00:00 2001 From: jorvis Date: Sun, 15 Sep 2019 22:04:06 -0500 Subject: [PATCH 3/8] Re-ordered arguments so axis was last --- pandas/core/generic.py | 4 ++-- pandas/core/internals/blocks.py | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 4db03bc7bcfef..e2894c6319667 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6274,8 +6274,8 @@ def fillna( ) new_data = self._data.fillna( - value=value, axis=axis, limit=limit, inplace=inplace, - downcast=downcast + value=value, limit=limit, inplace=inplace, + downcast=downcast, axis=axis ) elif isinstance(value, (dict, ABCSeries)): diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index d1e081b8217ea..2bcd217c2dc4d 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -386,7 +386,7 @@ def apply(self, func, **kwargs): return result - def fillna(self, value, axis=0, limit=None, inplace=False, downcast=None): + def fillna(self, value, limit=None, inplace=False, downcast=None, axis=0): """ fillna on the block with the value. If we fail, then convert to ObjectBlock and try again """ @@ -1839,7 +1839,7 @@ def concat_same_type(self, to_concat, placement=None): placement = placement or slice(0, len(values), 1) return self.make_block_same_class(values, ndim=self.ndim, placement=placement) - def fillna(self, value, axis=0, limit=None, inplace=False, downcast=None): + def fillna(self, value, limit=None, inplace=False, downcast=None, axis=0): values = self.values if inplace else self.values.copy() values = values.fillna(value=value, limit=limit) return [ @@ -2403,15 +2403,15 @@ def concat_same_type(self, to_concat, placement=None): return ObjectBlock(values, ndim=self.ndim, placement=placement) return super().concat_same_type(to_concat, placement) - def fillna(self, value, axis=0, limit=None, inplace=False, downcast=None): + def fillna(self, value, limit=None, inplace=False, downcast=None, axis=0): # We support filling a DatetimeTZ with a `value` whose timezone # is different by coercing to object. if self._can_hold_element(value): - return super().fillna(value, axis, limit, inplace, downcast) + return super().fillna(value, limit, inplace, downcast, axis) # different timezones, or a non-tz return self.astype(object).fillna( - value, axis=axis, limit=limit, inplace=inplace, downcast=downcast + value, limit=limit, inplace=inplace, downcast=downcast, axis=axis ) def setitem(self, indexer, value): From 38c40d96f7225c554230284f3112bf5a26de0129 Mon Sep 17 00:00:00 2001 From: jorvis Date: Sun, 15 Sep 2019 23:20:32 -0500 Subject: [PATCH 4/8] Added entry to whatsnew --- doc/source/whatsnew/v1.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 9840e4e94d28c..f24bfe25cc6ef 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -233,7 +233,7 @@ Other - Trying to set the ``display.precision``, ``display.max_rows`` or ``display.max_columns`` using :meth:`set_option` to anything but a ``None`` or a positive int will raise a ``ValueError`` (:issue:`23348`) - Using :meth:`DataFrame.replace` with overlapping keys in a nested dictionary will no longer raise, now matching the behavior of a flat dictionary (:issue:`27660`) - :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` now support dicts as ``compression`` argument with key ``'method'`` being the compression method and others as additional compression options when the compression method is ``'zip'``. (:issue:`26023`) -- +- :meth:`DataFrame.fillna` when using axis=1 previously failed to replace NaN values (:issue:`17399` and :issue:`17409`) .. _whatsnew_1000.contributors: From 7536477b86b46f3bd6727e57b2abc1b891c6cf4b Mon Sep 17 00:00:00 2001 From: jorvis Date: Sun, 15 Sep 2019 23:26:27 -0500 Subject: [PATCH 5/8] Added variable annotation for axis references --- pandas/core/internals/blocks.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 2bcd217c2dc4d..56793fe83e420 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -81,6 +81,7 @@ from pandas.core.nanops import nanpercentile from pandas.io.formats.printing import pprint_thing +from pandas_typing import Axis class Block(PandasObject): @@ -386,7 +387,7 @@ def apply(self, func, **kwargs): return result - def fillna(self, value, limit=None, inplace=False, downcast=None, axis=0): + def fillna(self, value, limit=None, inplace=False, downcast=None, axis: Axis = 0): """ fillna on the block with the value. If we fail, then convert to ObjectBlock and try again """ @@ -1839,7 +1840,7 @@ def concat_same_type(self, to_concat, placement=None): placement = placement or slice(0, len(values), 1) return self.make_block_same_class(values, ndim=self.ndim, placement=placement) - def fillna(self, value, limit=None, inplace=False, downcast=None, axis=0): + def fillna(self, value, limit=None, inplace=False, downcast=None, axis: Axis = 0): values = self.values if inplace else self.values.copy() values = values.fillna(value=value, limit=limit) return [ @@ -2403,7 +2404,7 @@ def concat_same_type(self, to_concat, placement=None): return ObjectBlock(values, ndim=self.ndim, placement=placement) return super().concat_same_type(to_concat, placement) - def fillna(self, value, limit=None, inplace=False, downcast=None, axis=0): + def fillna(self, value, limit=None, inplace=False, downcast=None, axis: Axis = 0): # We support filling a DatetimeTZ with a `value` whose timezone # is different by coercing to object. if self._can_hold_element(value): From df263866430e769de17f84daaadc7d1d217c7d0c Mon Sep 17 00:00:00 2001 From: jorvis Date: Sun, 15 Sep 2019 23:29:35 -0500 Subject: [PATCH 6/8] Fixed duplicate function name, renaming 2nd test_fillna_columns() to test_fillna_rows(), added comment to reference bug --- pandas/tests/frame/test_missing.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/frame/test_missing.py b/pandas/tests/frame/test_missing.py index 5469de28f9019..6fff1bde39970 100644 --- a/pandas/tests/frame/test_missing.py +++ b/pandas/tests/frame/test_missing.py @@ -671,7 +671,8 @@ def test_fillna_columns(self): expected = df.astype(float).fillna(method="ffill", axis=1) assert_frame_equal(result, expected) - def test_fillna_columns(self): + def test_fillna_rows(self): + #GH17399 df = DataFrame(np.random.randn(10, 4)) df.iloc[1:4, 1:4] = nan expected = df.copy() From 313e93491b5f0392bdb9e302d0e085690565b124 Mon Sep 17 00:00:00 2001 From: jorvis Date: Mon, 16 Sep 2019 11:04:02 -0500 Subject: [PATCH 7/8] Added one more annotation for axis --- pandas/core/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index e2894c6319667..da95e854908d5 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6275,7 +6275,7 @@ def fillna( new_data = self._data.fillna( value=value, limit=limit, inplace=inplace, - downcast=downcast, axis=axis + downcast=downcast, axis: Axis=axis ) elif isinstance(value, (dict, ABCSeries)): From e1863faa822c45a927d924c86f35b044729d0d61 Mon Sep 17 00:00:00 2001 From: jorvis Date: Thu, 19 Sep 2019 16:56:37 -0500 Subject: [PATCH 8/8] Updated test to use literal values, use row means --- pandas/tests/frame/test_missing.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/pandas/tests/frame/test_missing.py b/pandas/tests/frame/test_missing.py index 6fff1bde39970..f9ffaef659bb4 100644 --- a/pandas/tests/frame/test_missing.py +++ b/pandas/tests/frame/test_missing.py @@ -673,11 +673,17 @@ def test_fillna_columns(self): def test_fillna_rows(self): #GH17399 - df = DataFrame(np.random.randn(10, 4)) - df.iloc[1:4, 1:4] = nan - expected = df.copy() - expected.iloc[1:4, 1:4] = 0 - result = df.fillna(value=0, axis=1) + df = pd.DataFrame({ + "a": [1,2,3,4], + "b": [5,np.nan,7,8], + "c": [9,10,11,np.nan]}) + + expected = pd.DataFrame({ + "a": [1,2,3,4], + "b": [5,6,7,8], + "c": [9,10,11,6]}) + + result = df.fillna(df.mean(axis=1)) tm.assert_frame_equal(result, expected) def test_fillna_invalid_method(self, float_frame):