From 2930f1e57c4a2b7a8540e7c5b4490084ecde3ca3 Mon Sep 17 00:00:00 2001 From: jonathanrhughes <57222544+jonathanrhughes@users.noreply.github.com> Date: Fri, 28 Feb 2020 15:20:59 +0000 Subject: [PATCH] ENH: implement fill_value for df.add(other=Series) #13488 --- doc/source/whatsnew/v1.0.2.rst | 10 ++++ pandas/core/ops/__init__.py | 43 +++++++++++---- pandas/core/ops/docstrings.py | 30 +++++++++++ pandas/tests/frame/test_arithmetic.py | 76 ++++++++++++++++++++------- 4 files changed, 131 insertions(+), 28 deletions(-) diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst index 1b6098e6b6ac1..0c6c3caf11b09 100644 --- a/doc/source/whatsnew/v1.0.2.rst +++ b/doc/source/whatsnew/v1.0.2.rst @@ -10,6 +10,16 @@ including other versions of pandas. .. --------------------------------------------------------------------------- +.. _whatsnew_102.enhancements: + +Enhancements +~~~~~~~~~~~~ + +- :meth:`DataFrame.add` now accepts a ``fill_value`` not equal to ``None`` when ``other`` parameter equals :class:`Series`. + Same enhancement also available with other binary operators: :meth:`~DataFrame.sub`, :meth:`~DataFrame.mul`, :meth:`~DataFrame.div`, :meth:`~DataFrame.truediv`, :meth:`~DataFrame.floordiv`, :meth:`~DataFrame.mod`, :meth:`~DataFrame.pow`. (:issue:`13488`) + +.. --------------------------------------------------------------------------- + .. _whatsnew_102.regressions: Fixed regressions diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index d0adf2da04db3..03d793d386acd 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -14,7 +14,12 @@ from pandas._typing import ArrayLike, Level from pandas.util._decorators import Appender -from pandas.core.dtypes.common import is_list_like, is_timedelta64_dtype +from pandas.core.dtypes.common import ( + is_bool, + is_list_like, + is_number, + is_timedelta64_dtype, +) from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries from pandas.core.dtypes.missing import isna @@ -341,7 +346,11 @@ def fill_binop(left, right, fill_value): left = left.copy() left[left_mask & mask] = fill_value - if right_mask.any(): + if is_bool(right_mask): + if right_mask: + right = left._constructor(right, index=left.index) + right[right_mask & mask] = fill_value + elif right_mask.any(): # Avoid making a copy if we can right = right.copy() right[right_mask & mask] = fill_value @@ -585,7 +594,7 @@ def flex_wrapper(self, other, level=None, fill_value=None, axis=0): # DataFrame -def _combine_series_frame(left, right, func, axis: int): +def _combine_series_frame(left, right, func, axis: int, fill_value=None): """ Apply binary operator `func` to self, other using alignment and fill conventions determined by the axis argument. @@ -596,16 +605,29 @@ def _combine_series_frame(left, right, func, axis: int): right : Series func : binary operator axis : {0, 1} + fill_value : numeric, optional Returns ------- result : DataFrame """ + if fill_value is None: + _arith_op = func + + else: + + def _arith_op(left, right): + left, right = fill_binop(left, right, fill_value) + return func(left, right) + # We assume that self.align(other, ...) has already been called if axis == 0: - new_data = left._combine_match_index(right, func) + if fill_value is not None: + new_data = dispatch_to_series(left, right, _arith_op, axis=0) + else: + new_data = left._combine_match_index(right, _arith_op) else: - new_data = dispatch_to_series(left, right, func, axis="columns") + new_data = dispatch_to_series(left, right, _arith_op, axis="columns") return left._construct_result(new_data) @@ -771,6 +793,12 @@ def f(self, other, axis=default_axis, level=None, fill_value=None): if _should_reindex_frame_op(self, other, axis, default_axis, fill_value, level): return _frame_arith_method_with_reindex(self, other, op) + if not is_number(fill_value) and fill_value is not None: + raise TypeError( + "fill_value must be numeric or None. " + f"Got {type(fill_value).__name__}" + ) + self, other = _align_method_FRAME(self, other, axis, flex=True, level=level) if isinstance(other, ABCDataFrame): @@ -787,11 +815,8 @@ def f(self, other, axis=default_axis, level=None, fill_value=None): pass_op = op if axis in [0, "columns", None] else na_op pass_op = pass_op if not is_logical else op - if fill_value is not None: - raise NotImplementedError(f"fill_value {fill_value} not supported.") - axis = self._get_axis_number(axis) if axis is not None else 1 - return _combine_series_frame(self, other, pass_op, axis=axis) + return _combine_series_frame(self, other, pass_op, axis, fill_value) else: # in this case we always have `np.ndim(other) == 0` if fill_value is not None: diff --git a/pandas/core/ops/docstrings.py b/pandas/core/ops/docstrings.py index 203ea3946d1b2..7685373ca19e5 100644 --- a/pandas/core/ops/docstrings.py +++ b/pandas/core/ops/docstrings.py @@ -478,6 +478,36 @@ def _make_flex_doc(op_name, typ): triangle 4 181 rectangle 5 361 +Add Series by axis when values are missing + +>>> a = pd.Series([2, 4], index=['circle', 'triangle']) + +>>> df.add(a, axis=0) + angles degrees +circle 2.0 362.0 +rectangle NaN NaN +triangle 7.0 184.0 + +>>> df.add(a, axis=0, fill_value=1) + angles degrees +circle 2.0 362.0 +rectangle 5.0 361.0 +triangle 7.0 184.0 + +>>> b = pd.Series([3, 6, 9], index=["angles", "degrees", "scale"]) + +>>> df.add(b) + angles degrees scale +circle 3 366 NaN +triangle 6 186 NaN +rectangle 7 366 NaN + +>>> df.add(b, fill_value=1) + angles degrees scale +circle 3 366 10.0 +triangle 6 186 10.0 +rectangle 7 366 10.0 + Divide by constant with reverse version. >>> df.div(10) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index e4be8a979a70f..9b46e6a509919 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -453,12 +453,6 @@ def test_arith_flex_frame_corner(self, float_frame): result = float_frame[:0].add(float_frame) tm.assert_frame_equal(result, float_frame * np.nan) - with pytest.raises(NotImplementedError, match="fill_value"): - float_frame.add(float_frame.iloc[0], fill_value=3) - - with pytest.raises(NotImplementedError, match="fill_value"): - float_frame.add(float_frame.iloc[0], axis="index", fill_value=3) - def test_arith_flex_series(self, simple_frame): df = simple_frame @@ -490,19 +484,6 @@ def test_arith_flex_series(self, simple_frame): result = df.div(df[0], axis="index") tm.assert_frame_equal(result, expected) - def test_arith_flex_zero_len_raises(self): - # GH 19522 passing fill_value to frame flex arith methods should - # raise even in the zero-length special cases - ser_len0 = pd.Series([], dtype=object) - df_len0 = pd.DataFrame(columns=["A", "B"]) - df = pd.DataFrame([[1, 2], [3, 4]], columns=["A", "B"]) - - with pytest.raises(NotImplementedError, match="fill_value"): - df.add(ser_len0, fill_value="E") - - with pytest.raises(NotImplementedError, match="fill_value"): - df_len0.sub(df["A"], axis=None, fill_value=3) - class TestFrameArithmetic: def test_td64_op_nat_casting(self): @@ -774,6 +755,63 @@ def test_frame_single_columns_object_sum_axis_1(): tm.assert_series_equal(result, expected) +@pytest.fixture +def simple_frame_with_na(): + df = pd.DataFrame( + [[np.nan, 2.0, 3.0], [4.0, np.nan, 6.0], [7.0, 8.0, 9.0]], + index=["a", "b", "c"], + columns=np.arange(3), + ) + return df + + +@pytest.mark.parametrize( + "axis, series, expected", + [ + ( + 0, + pd.Series([1.0, np.nan, 3.0, 4.0], index=["a", "b", "c", "d"]), + pd.DataFrame( + [ + [2.0, 3.0, 4.0], + [5.0, np.nan, 7.0], + [10.0, 11.0, 12.0], + [5.0, 5.0, 5.0], + ], + columns=np.arange(3), + index=["a", "b", "c", "d"], + ), + ), + ( + "columns", + pd.Series([np.nan, 2.0, np.nan, 4.0], index=np.arange(4)), + pd.DataFrame( + [[np.nan, 4.0, 4.0, 5.0], [5.0, 3.0, 7.0, 5.0], [8.0, 10.0, 10.0, 5.0]], + index=["a", "b", "c"], + columns=np.arange(4), + ), + ), + ], +) +def test_add_series_to_frame_with_fill(simple_frame_with_na, axis, series, expected): + # Check missing values correctly populated with fill-value when + # adding series to frame, GH#13488. + df = simple_frame_with_na + result = df.add(other=series, axis=axis, fill_value=1) + expected = expected + tm.assert_frame_equal(result, expected) + + +def test_df_add_with_non_numeric_fill(simple_frame): + # Check non-numeric fill-value raises when adding series to frame, GH#13488. + # Test replaces non-numeric check in removed test_arith_flex_zero_len_raises. + df = simple_frame + ser = pd.Series([1.0, np.nan, 3.0], index=["a", "b", "c"]) + + with pytest.raises(TypeError, match="fill_value"): + df.add(ser, fill_value="E") + + # ------------------------------------------------------------------- # Unsorted # These arithmetic tests were previously in other files, eventually