Skip to content

ENH: implement fill_value for df.add(other=Series) #13488 #32335

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions doc/source/whatsnew/v1.0.2.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,16 @@ including other versions of pandas.

.. ---------------------------------------------------------------------------

.. _whatsnew_102.enhancements:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this would go in the 1.1.0 file


Enhancements
~~~~~~~~~~~~

- :meth:`DataFrame.add` now accepts a ``fill_value`` not equal to ``None`` when ``other`` parameter equals :class:`Series`.
Same enhancement also available with other binary operators: :meth:`~DataFrame.sub`, :meth:`~DataFrame.mul`, :meth:`~DataFrame.div`, :meth:`~DataFrame.truediv`, :meth:`~DataFrame.floordiv`, :meth:`~DataFrame.mod`, :meth:`~DataFrame.pow`. (:issue:`13488`)

.. ---------------------------------------------------------------------------

.. _whatsnew_102.regressions:

Fixed regressions
Expand Down
43 changes: 34 additions & 9 deletions pandas/core/ops/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,12 @@
from pandas._typing import ArrayLike, Level
from pandas.util._decorators import Appender

from pandas.core.dtypes.common import is_list_like, is_timedelta64_dtype
from pandas.core.dtypes.common import (
is_bool,
is_list_like,
is_number,
is_timedelta64_dtype,
)
from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries
from pandas.core.dtypes.missing import isna

Expand Down Expand Up @@ -341,7 +346,11 @@ def fill_binop(left, right, fill_value):
left = left.copy()
left[left_mask & mask] = fill_value

if right_mask.any():
if is_bool(right_mask):
if right_mask:
right = left._constructor(right, index=left.index)
right[right_mask & mask] = fill_value
elif right_mask.any():
# Avoid making a copy if we can
right = right.copy()
right[right_mask & mask] = fill_value
Expand Down Expand Up @@ -585,7 +594,7 @@ def flex_wrapper(self, other, level=None, fill_value=None, axis=0):
# DataFrame


def _combine_series_frame(left, right, func, axis: int):
def _combine_series_frame(left, right, func, axis: int, fill_value=None):
"""
Apply binary operator `func` to self, other using alignment and fill
conventions determined by the axis argument.
Expand All @@ -596,16 +605,29 @@ def _combine_series_frame(left, right, func, axis: int):
right : Series
func : binary operator
axis : {0, 1}
fill_value : numeric, optional

Returns
-------
result : DataFrame
"""
if fill_value is None:
_arith_op = func

else:

def _arith_op(left, right):
left, right = fill_binop(left, right, fill_value)
return func(left, right)

# We assume that self.align(other, ...) has already been called
if axis == 0:
new_data = left._combine_match_index(right, func)
if fill_value is not None:
new_data = dispatch_to_series(left, right, _arith_op, axis=0)
else:
new_data = left._combine_match_index(right, _arith_op)
else:
new_data = dispatch_to_series(left, right, func, axis="columns")
new_data = dispatch_to_series(left, right, _arith_op, axis="columns")

return left._construct_result(new_data)

Expand Down Expand Up @@ -771,6 +793,12 @@ def f(self, other, axis=default_axis, level=None, fill_value=None):
if _should_reindex_frame_op(self, other, axis, default_axis, fill_value, level):
return _frame_arith_method_with_reindex(self, other, op)

if not is_number(fill_value) and fill_value is not None:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i dont think this condition is right. what if the series we are working with is e.g. datetime64?

raise TypeError(
"fill_value must be numeric or None. "
f"Got {type(fill_value).__name__}"
)

self, other = _align_method_FRAME(self, other, axis, flex=True, level=level)

if isinstance(other, ABCDataFrame):
Expand All @@ -787,11 +815,8 @@ def f(self, other, axis=default_axis, level=None, fill_value=None):
pass_op = op if axis in [0, "columns", None] else na_op
pass_op = pass_op if not is_logical else op

if fill_value is not None:
raise NotImplementedError(f"fill_value {fill_value} not supported.")

axis = self._get_axis_number(axis) if axis is not None else 1
return _combine_series_frame(self, other, pass_op, axis=axis)
return _combine_series_frame(self, other, pass_op, axis, fill_value)
else:
# in this case we always have `np.ndim(other) == 0`
if fill_value is not None:
Expand Down
30 changes: 30 additions & 0 deletions pandas/core/ops/docstrings.py
Original file line number Diff line number Diff line change
Expand Up @@ -478,6 +478,36 @@ def _make_flex_doc(op_name, typ):
triangle 4 181
rectangle 5 361

Add Series by axis when values are missing

>>> a = pd.Series([2, 4], index=['circle', 'triangle'])

>>> df.add(a, axis=0)
angles degrees
circle 2.0 362.0
rectangle NaN NaN
triangle 7.0 184.0

>>> df.add(a, axis=0, fill_value=1)
angles degrees
circle 2.0 362.0
rectangle 5.0 361.0
triangle 7.0 184.0

>>> b = pd.Series([3, 6, 9], index=["angles", "degrees", "scale"])

>>> df.add(b)
angles degrees scale
circle 3 366 NaN
triangle 6 186 NaN
rectangle 7 366 NaN

>>> df.add(b, fill_value=1)
angles degrees scale
circle 3 366 10.0
triangle 6 186 10.0
rectangle 7 366 10.0

Divide by constant with reverse version.

>>> df.div(10)
Expand Down
76 changes: 57 additions & 19 deletions pandas/tests/frame/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -453,12 +453,6 @@ def test_arith_flex_frame_corner(self, float_frame):
result = float_frame[:0].add(float_frame)
tm.assert_frame_equal(result, float_frame * np.nan)

with pytest.raises(NotImplementedError, match="fill_value"):
float_frame.add(float_frame.iloc[0], fill_value=3)

with pytest.raises(NotImplementedError, match="fill_value"):
float_frame.add(float_frame.iloc[0], axis="index", fill_value=3)

def test_arith_flex_series(self, simple_frame):
df = simple_frame

Expand Down Expand Up @@ -490,19 +484,6 @@ def test_arith_flex_series(self, simple_frame):
result = df.div(df[0], axis="index")
tm.assert_frame_equal(result, expected)

def test_arith_flex_zero_len_raises(self):
# GH 19522 passing fill_value to frame flex arith methods should
# raise even in the zero-length special cases
ser_len0 = pd.Series([], dtype=object)
df_len0 = pd.DataFrame(columns=["A", "B"])
df = pd.DataFrame([[1, 2], [3, 4]], columns=["A", "B"])

with pytest.raises(NotImplementedError, match="fill_value"):
df.add(ser_len0, fill_value="E")

with pytest.raises(NotImplementedError, match="fill_value"):
df_len0.sub(df["A"], axis=None, fill_value=3)


class TestFrameArithmetic:
def test_td64_op_nat_casting(self):
Expand Down Expand Up @@ -774,6 +755,63 @@ def test_frame_single_columns_object_sum_axis_1():
tm.assert_series_equal(result, expected)


@pytest.fixture
def simple_frame_with_na():
df = pd.DataFrame(
[[np.nan, 2.0, 3.0], [4.0, np.nan, 6.0], [7.0, 8.0, 9.0]],
index=["a", "b", "c"],
columns=np.arange(3),
)
return df


@pytest.mark.parametrize(
"axis, series, expected",
[
(
0,
pd.Series([1.0, np.nan, 3.0, 4.0], index=["a", "b", "c", "d"]),
pd.DataFrame(
[
[2.0, 3.0, 4.0],
[5.0, np.nan, 7.0],
[10.0, 11.0, 12.0],
[5.0, 5.0, 5.0],
],
columns=np.arange(3),
index=["a", "b", "c", "d"],
),
),
(
"columns",
pd.Series([np.nan, 2.0, np.nan, 4.0], index=np.arange(4)),
pd.DataFrame(
[[np.nan, 4.0, 4.0, 5.0], [5.0, 3.0, 7.0, 5.0], [8.0, 10.0, 10.0, 5.0]],
index=["a", "b", "c"],
columns=np.arange(4),
),
),
],
)
def test_add_series_to_frame_with_fill(simple_frame_with_na, axis, series, expected):
# Check missing values correctly populated with fill-value when
# adding series to frame, GH#13488.
df = simple_frame_with_na
result = df.add(other=series, axis=axis, fill_value=1)
expected = expected
tm.assert_frame_equal(result, expected)


def test_df_add_with_non_numeric_fill(simple_frame):
# Check non-numeric fill-value raises when adding series to frame, GH#13488.
# Test replaces non-numeric check in removed test_arith_flex_zero_len_raises.
df = simple_frame
ser = pd.Series([1.0, np.nan, 3.0], index=["a", "b", "c"])

with pytest.raises(TypeError, match="fill_value"):
df.add(ser, fill_value="E")


# -------------------------------------------------------------------
# Unsorted
# These arithmetic tests were previously in other files, eventually
Expand Down