Skip to content

CLN: always dispatch-to-series #34286

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
May 23, 2020
10 changes: 1 addition & 9 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,6 @@
sanitize_index,
to_arrays,
)
from pandas.core.ops.missing import dispatch_fill_zeros
from pandas.core.series import Series
from pandas.core.sorting import ensure_key_mapped

Expand Down Expand Up @@ -5733,14 +5732,7 @@ def _arith_op(left, right):
left, right = ops.fill_binop(left, right, fill_value)
return func(left, right)

if ops.should_series_dispatch(self, other, func):
# iterate over columns
new_data = ops.dispatch_to_series(self, other, _arith_op)
else:
with np.errstate(all="ignore"):
res_values = _arith_op(self.values, other.values)
new_data = dispatch_fill_zeros(func, self.values, other.values, res_values)

new_data = ops.dispatch_to_series(self, other, _arith_op)
return new_data

def _construct_result(self, result) -> "DataFrame":
Expand Down
30 changes: 20 additions & 10 deletions pandas/core/ops/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,12 @@
from pandas.core.ops.array_ops import (
arithmetic_op,
comparison_op,
define_na_arithmetic_op,
get_array_op,
logical_op,
)
from pandas.core.ops.array_ops import comp_method_OBJECT_ARRAY # noqa:F401
from pandas.core.ops.blockwise import operate_blockwise
from pandas.core.ops.common import unpack_zerodim_and_defer
from pandas.core.ops.dispatch import should_series_dispatch
from pandas.core.ops.docstrings import (
_arith_doc_FRAME,
_flex_comp_doc_FRAME,
Expand Down Expand Up @@ -155,7 +153,7 @@ def _maybe_match_name(a, b):
# -----------------------------------------------------------------------------


def _get_frame_op_default_axis(name):
def _get_frame_op_default_axis(name: str) -> Optional[str]:
"""
Only DataFrame cares about default_axis, specifically:
special methods have default_axis=None and flex methods
Expand Down Expand Up @@ -324,7 +322,10 @@ def dispatch_to_series(left, right, func, str_rep=None, axis=None):
return type(left)(bm)

elif isinstance(right, ABCDataFrame):
assert right._indexed_same(left)
assert left.index.equals(right.index)
assert left.columns.equals(right.columns)
# TODO: The previous assertion `assert right._indexed_same(left)`
# fails in cases with shaoe[1] == 0
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't really understand this comment

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

im finding cases where left.columns and right.columns are both empty but different dtypes, which still satisfy left.columns.equals(right.columns), but not right._indexed_same(left). looks like this is an artifact of the intersection done in _should_reindex_frame_op, looking into this now. (also will fix the typo shaoe -> shape)


array_op = get_array_op(func, str_rep=str_rep)
bm = operate_blockwise(left, right, array_op)
Expand Down Expand Up @@ -392,6 +393,7 @@ def _arith_method_SERIES(cls, op, special):
Wrapper function for Series arithmetic operations, to avoid
code duplication.
"""
assert special # non-special uses _flex_method_SERIES
str_rep = _get_opstr(op)
op_name = _get_op_name(op, special)

Expand All @@ -416,6 +418,7 @@ def _comp_method_SERIES(cls, op, special):
Wrapper function for Series arithmetic operations, to avoid
code duplication.
"""
assert special # non-special uses _flex_method_SERIES
str_rep = _get_opstr(op)
op_name = _get_op_name(op, special)

Expand Down Expand Up @@ -443,6 +446,7 @@ def _bool_method_SERIES(cls, op, special):
Wrapper function for Series arithmetic operations, to avoid
code duplication.
"""
assert special # non-special uses _flex_method_SERIES
op_name = _get_op_name(op, special)

@unpack_zerodim_and_defer(op_name)
Expand All @@ -461,6 +465,7 @@ def wrapper(self, other):


def _flex_method_SERIES(cls, op, special):
assert not special # "special" also means "not flex"
name = _get_op_name(op, special)
doc = _make_flex_doc(name, "series")

Expand Down Expand Up @@ -624,7 +629,7 @@ def to_series(right):


def _should_reindex_frame_op(
left: "DataFrame", right, op, axis, default_axis: int, fill_value, level
left: "DataFrame", right, op, axis, default_axis, fill_value, level
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why is this changed?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

without this i got mypy complaints that i havent been able to track down

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you share the error message?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pandas/core/ops/__init__.py:706: error: Argument 5 to "_should_reindex_frame_op" has incompatible type "Optional[str]"; expected "int"

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Gotcha. Looks like _get_frame_op_default_axis returns an Optional string and that is used as the default argument in the f function local to _arith_method_FRAME.

So makes sense Mypy would complain about that. Whether the existing annotation is wrong or if its discovering a bug for us I'm not sure

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i think its the annotation. there is some more code that can be ripped out after this, so im planning to do an annotation pass towards the end

) -> bool:
"""
Check if this is an operation between DataFrames that will need to reindex.
Expand Down Expand Up @@ -680,11 +685,12 @@ def _frame_arith_method_with_reindex(


def _arith_method_FRAME(cls, op, special):
# This is the only function where `special` can be either True or False
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you type if possible

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

str_rep = _get_opstr(op)
op_name = _get_op_name(op, special)
default_axis = _get_frame_op_default_axis(op_name)

na_op = define_na_arithmetic_op(op, str_rep)
na_op = get_array_op(op, str_rep)
is_logical = str_rep in ["&", "|", "^"]

if op_name in _op_descriptions:
Expand All @@ -701,18 +707,19 @@ def f(self, other, axis=default_axis, level=None, fill_value=None):
):
return _frame_arith_method_with_reindex(self, other, op)

# TODO: why are we passing flex=True instead of flex=not special?
# 15 tests fail if we pass flex=not special instead
self, other = _align_method_FRAME(self, other, axis, flex=True, level=level)

if isinstance(other, ABCDataFrame):
# Another DataFrame
pass_op = op if should_series_dispatch(self, other, op) else na_op
pass_op = pass_op if not is_logical else op

new_data = self._combine_frame(other, pass_op, fill_value)
new_data = self._combine_frame(other, na_op, fill_value)

elif isinstance(other, ABCSeries):
# For these values of `axis`, we end up dispatching to Series op,
# so do not want the masked op.
# TODO: the above comment is no longer accurate since we now
# operate blockwise if other._values is an ndarray
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you update the comment instead?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

simpler to do this in the next pass, which will make us always pass the same thing, at which point we can remove the comment entirely

pass_op = op if axis in [0, "columns", None] else na_op
pass_op = pass_op if not is_logical else op

Expand All @@ -738,9 +745,11 @@ def f(self, other, axis=default_axis, level=None, fill_value=None):


def _flex_comp_method_FRAME(cls, op, special):
assert not special # "special" also means "not flex"
str_rep = _get_opstr(op)
op_name = _get_op_name(op, special)
default_axis = _get_frame_op_default_axis(op_name)
assert default_axis == "columns", default_axis # because we are not "special"

doc = _flex_comp_doc_FRAME.format(
op_name=op_name, desc=_op_descriptions[op_name]["desc"]
Expand Down Expand Up @@ -772,6 +781,7 @@ def f(self, other, axis=default_axis, level=None):


def _comp_method_FRAME(cls, op, special):
assert special # "special" also means "not flex"
str_rep = _get_opstr(op)
op_name = _get_op_name(op, special)

Expand Down
14 changes: 6 additions & 8 deletions pandas/core/ops/array_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,13 +121,6 @@ def masked_arith_op(x: np.ndarray, y, op):
return result


def define_na_arithmetic_op(op, str_rep: Optional[str]):
def na_op(x, y):
return na_arithmetic_op(x, y, op, str_rep)

return na_op


def na_arithmetic_op(left, right, op, str_rep: Optional[str], is_cmp: bool = False):
"""
Return the result of evaluating op on the passed in values.
Expand Down Expand Up @@ -386,8 +379,13 @@ def get_array_op(op, str_rep: Optional[str] = None):

Returns
-------
function
functools.partial
"""
if isinstance(op, partial):
# We get here via dispatch_to_series in DataFrame case
# TODO: avoid getting here
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is an example with which you get this?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In _arith_method_FRAME we pass na_op (which is one a partial returned by this function here) to _combine_frame, which in turn passes na_op to dispatch_to_series, which then passes na_op back to this function

return op

op_name = op.__name__.strip("_")
if op_name in {"eq", "ne", "lt", "le", "gt", "ge"}:
return partial(comparison_op, op=op, str_rep=str_rep)
Expand Down
60 changes: 0 additions & 60 deletions pandas/core/ops/dispatch.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,6 @@

from pandas._typing import ArrayLike

from pandas.core.dtypes.common import (
is_datetime64_dtype,
is_integer_dtype,
is_object_dtype,
is_timedelta64_dtype,
)
from pandas.core.dtypes.generic import ABCExtensionArray


Expand All @@ -28,57 +22,3 @@ def should_extension_dispatch(left: ArrayLike, right: Any) -> bool:
bool
"""
return isinstance(left, ABCExtensionArray) or isinstance(right, ABCExtensionArray)


def should_series_dispatch(left, right, op):
"""
Identify cases where a DataFrame operation should dispatch to its
Series counterpart.

Parameters
----------
left : DataFrame
right : DataFrame or Series
op : binary operator

Returns
-------
override : bool
"""
if left._is_mixed_type or right._is_mixed_type:
return True

if op.__name__.strip("_") in ["and", "or", "xor", "rand", "ror", "rxor"]:
# TODO: GH references for what this fixes
# Note: this check must come before the check for nonempty columns.
return True

if right.ndim == 1:
# operating with Series, short-circuit checks that would fail
# with AttributeError.
return False

if not len(left.columns) or not len(right.columns):
# ensure obj.dtypes[0] exists for each obj
return False

ldtype = left.dtypes.iloc[0]
rdtype = right.dtypes.iloc[0]

if (
is_timedelta64_dtype(ldtype)
and (is_integer_dtype(rdtype) or is_object_dtype(rdtype))
) or (
is_timedelta64_dtype(rdtype)
and (is_integer_dtype(ldtype) or is_object_dtype(ldtype))
):
# numpy integer dtypes as timedelta64 dtypes in this scenario
return True

if (is_datetime64_dtype(ldtype) and is_object_dtype(rdtype)) or (
is_datetime64_dtype(rdtype) and is_object_dtype(ldtype)
):
# in particular case where one is an array of DateOffsets
return True

return False
1 change: 0 additions & 1 deletion pandas/core/ops/methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,6 @@ def _create_methods(cls, arith_method, comp_method, bool_method, special):
dict(
and_=bool_method(cls, operator.and_, special),
or_=bool_method(cls, operator.or_, special),
# For some reason ``^`` wasn't used in original.
xor=bool_method(cls, operator.xor, special),
rand_=bool_method(cls, rand_, special),
ror_=bool_method(cls, ror_, special),
Expand Down
5 changes: 4 additions & 1 deletion pandas/tests/arithmetic/test_timedelta64.py
Original file line number Diff line number Diff line change
Expand Up @@ -552,7 +552,10 @@ def test_tda_add_dt64_object_array(self, box_with_array, tz_naive_fixture):
obj = tm.box_expected(tdi, box)
other = tm.box_expected(dti, box)

with tm.assert_produces_warning(PerformanceWarning):
warn = None
if box is not pd.DataFrame or tz_naive_fixture is None:
warn = PerformanceWarning
with tm.assert_produces_warning(warn):
result = obj + other.astype(object)
tm.assert_equal(result, other)

Expand Down