-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
CLN: always dispatch-to-series #34286
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 5 commits
907e3c6
345ef53
1c41e99
971a55f
b75a31a
117471d
b4fc220
6c76148
5f34f3a
9ca0e1f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,14 +21,12 @@ | |
from pandas.core.ops.array_ops import ( | ||
arithmetic_op, | ||
comparison_op, | ||
define_na_arithmetic_op, | ||
get_array_op, | ||
logical_op, | ||
) | ||
from pandas.core.ops.array_ops import comp_method_OBJECT_ARRAY # noqa:F401 | ||
from pandas.core.ops.blockwise import operate_blockwise | ||
from pandas.core.ops.common import unpack_zerodim_and_defer | ||
from pandas.core.ops.dispatch import should_series_dispatch | ||
from pandas.core.ops.docstrings import ( | ||
_arith_doc_FRAME, | ||
_flex_comp_doc_FRAME, | ||
|
@@ -155,7 +153,7 @@ def _maybe_match_name(a, b): | |
# ----------------------------------------------------------------------------- | ||
|
||
|
||
def _get_frame_op_default_axis(name): | ||
def _get_frame_op_default_axis(name: str) -> Optional[str]: | ||
""" | ||
Only DataFrame cares about default_axis, specifically: | ||
special methods have default_axis=None and flex methods | ||
|
@@ -324,7 +322,10 @@ def dispatch_to_series(left, right, func, str_rep=None, axis=None): | |
return type(left)(bm) | ||
|
||
elif isinstance(right, ABCDataFrame): | ||
assert right._indexed_same(left) | ||
assert left.index.equals(right.index) | ||
assert left.columns.equals(right.columns) | ||
# TODO: The previous assertion `assert right._indexed_same(left)` | ||
# fails in cases with shaoe[1] == 0 | ||
|
||
array_op = get_array_op(func, str_rep=str_rep) | ||
bm = operate_blockwise(left, right, array_op) | ||
|
@@ -392,6 +393,7 @@ def _arith_method_SERIES(cls, op, special): | |
Wrapper function for Series arithmetic operations, to avoid | ||
code duplication. | ||
""" | ||
assert special # non-special uses _flex_method_SERIES | ||
str_rep = _get_opstr(op) | ||
op_name = _get_op_name(op, special) | ||
|
||
|
@@ -416,6 +418,7 @@ def _comp_method_SERIES(cls, op, special): | |
Wrapper function for Series arithmetic operations, to avoid | ||
code duplication. | ||
""" | ||
assert special # non-special uses _flex_method_SERIES | ||
str_rep = _get_opstr(op) | ||
op_name = _get_op_name(op, special) | ||
|
||
|
@@ -443,6 +446,7 @@ def _bool_method_SERIES(cls, op, special): | |
Wrapper function for Series arithmetic operations, to avoid | ||
code duplication. | ||
""" | ||
assert special # non-special uses _flex_method_SERIES | ||
op_name = _get_op_name(op, special) | ||
|
||
@unpack_zerodim_and_defer(op_name) | ||
|
@@ -461,6 +465,7 @@ def wrapper(self, other): | |
|
||
|
||
def _flex_method_SERIES(cls, op, special): | ||
assert not special # "special" also means "not flex" | ||
name = _get_op_name(op, special) | ||
doc = _make_flex_doc(name, "series") | ||
|
||
|
@@ -624,7 +629,7 @@ def to_series(right): | |
|
||
|
||
def _should_reindex_frame_op( | ||
left: "DataFrame", right, op, axis, default_axis: int, fill_value, level | ||
left: "DataFrame", right, op, axis, default_axis, fill_value, level | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why is this changed? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. without this i got mypy complaints that i havent been able to track down There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you share the error message? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Gotcha. Looks like So makes sense Mypy would complain about that. Whether the existing annotation is wrong or if its discovering a bug for us I'm not sure There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i think its the annotation. there is some more code that can be ripped out after this, so im planning to do an annotation pass towards the end |
||
) -> bool: | ||
""" | ||
Check if this is an operation between DataFrames that will need to reindex. | ||
|
@@ -680,11 +685,12 @@ def _frame_arith_method_with_reindex( | |
|
||
|
||
def _arith_method_FRAME(cls, op, special): | ||
# This is the only function where `special` can be either True or False | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you type if possible There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
str_rep = _get_opstr(op) | ||
op_name = _get_op_name(op, special) | ||
default_axis = _get_frame_op_default_axis(op_name) | ||
|
||
na_op = define_na_arithmetic_op(op, str_rep) | ||
na_op = get_array_op(op, str_rep) | ||
is_logical = str_rep in ["&", "|", "^"] | ||
|
||
if op_name in _op_descriptions: | ||
|
@@ -701,18 +707,19 @@ def f(self, other, axis=default_axis, level=None, fill_value=None): | |
): | ||
return _frame_arith_method_with_reindex(self, other, op) | ||
|
||
# TODO: why are we passing flex=True instead of flex=not special? | ||
# 15 tests fail if we pass flex=not special instead | ||
self, other = _align_method_FRAME(self, other, axis, flex=True, level=level) | ||
|
||
if isinstance(other, ABCDataFrame): | ||
# Another DataFrame | ||
pass_op = op if should_series_dispatch(self, other, op) else na_op | ||
pass_op = pass_op if not is_logical else op | ||
|
||
new_data = self._combine_frame(other, pass_op, fill_value) | ||
new_data = self._combine_frame(other, na_op, fill_value) | ||
|
||
elif isinstance(other, ABCSeries): | ||
# For these values of `axis`, we end up dispatching to Series op, | ||
# so do not want the masked op. | ||
# TODO: the above comment is no longer accurate since we now | ||
# operate blockwise if other._values is an ndarray | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you update the comment instead? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. simpler to do this in the next pass, which will make us always pass the same thing, at which point we can remove the comment entirely |
||
pass_op = op if axis in [0, "columns", None] else na_op | ||
pass_op = pass_op if not is_logical else op | ||
|
||
|
@@ -738,9 +745,11 @@ def f(self, other, axis=default_axis, level=None, fill_value=None): | |
|
||
|
||
def _flex_comp_method_FRAME(cls, op, special): | ||
assert not special # "special" also means "not flex" | ||
str_rep = _get_opstr(op) | ||
op_name = _get_op_name(op, special) | ||
default_axis = _get_frame_op_default_axis(op_name) | ||
assert default_axis == "columns", default_axis # because we are not "special" | ||
|
||
doc = _flex_comp_doc_FRAME.format( | ||
op_name=op_name, desc=_op_descriptions[op_name]["desc"] | ||
|
@@ -772,6 +781,7 @@ def f(self, other, axis=default_axis, level=None): | |
|
||
|
||
def _comp_method_FRAME(cls, op, special): | ||
assert special # "special" also means "not flex" | ||
str_rep = _get_opstr(op) | ||
op_name = _get_op_name(op, special) | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -121,13 +121,6 @@ def masked_arith_op(x: np.ndarray, y, op): | |
return result | ||
|
||
|
||
def define_na_arithmetic_op(op, str_rep: Optional[str]): | ||
def na_op(x, y): | ||
return na_arithmetic_op(x, y, op, str_rep) | ||
|
||
return na_op | ||
|
||
|
||
def na_arithmetic_op(left, right, op, str_rep: Optional[str], is_cmp: bool = False): | ||
""" | ||
Return the result of evaluating op on the passed in values. | ||
|
@@ -386,8 +379,13 @@ def get_array_op(op, str_rep: Optional[str] = None): | |
|
||
Returns | ||
------- | ||
function | ||
functools.partial | ||
""" | ||
if isinstance(op, partial): | ||
# We get here via dispatch_to_series in DataFrame case | ||
# TODO: avoid getting here | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What is an example with which you get this? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In _arith_method_FRAME we pass na_op (which is one a partial returned by this function here) to _combine_frame, which in turn passes na_op to dispatch_to_series, which then passes na_op back to this function |
||
return op | ||
|
||
op_name = op.__name__.strip("_") | ||
if op_name in {"eq", "ne", "lt", "le", "gt", "ge"}: | ||
return partial(comparison_op, op=op, str_rep=str_rep) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't really understand this comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
im finding cases where left.columns and right.columns are both empty but different dtypes, which still satisfy
left.columns.equals(right.columns)
, but notright._indexed_same(left)
. looks like this is an artifact of the intersection done in_should_reindex_frame_op
, looking into this now. (also will fix the typo shaoe -> shape)