Skip to content

Commit 89f8af7

Browse files
authored
CLN: always dispatch-to-series (#34286)
* comments * CLN: always dispatch-to-series * More accurate assertions * clarify comment, annotations
1 parent 7c46c68 commit 89f8af7

File tree

8 files changed

+42
-108
lines changed

8 files changed

+42
-108
lines changed

pandas/_libs/tslibs/offsets.pyx

+1-8
Original file line numberDiff line numberDiff line change
@@ -1299,7 +1299,7 @@ cdef class QuarterOffset(SingleConstructorOffset):
12991299
return type(dtindex)._simple_new(shifted, dtype=dtindex.dtype)
13001300

13011301

1302-
cdef class MonthOffset(BaseOffset):
1302+
cdef class MonthOffset(SingleConstructorOffset):
13031303
def is_on_offset(self, dt) -> bool:
13041304
if self.normalize and not is_normalized(dt):
13051305
return False
@@ -1316,13 +1316,6 @@ cdef class MonthOffset(BaseOffset):
13161316
shifted = shift_months(dtindex.asi8, self.n, self._day_opt)
13171317
return type(dtindex)._simple_new(shifted, dtype=dtindex.dtype)
13181318

1319-
@classmethod
1320-
def _from_name(cls, suffix=None):
1321-
# default _from_name calls cls with no args
1322-
if suffix:
1323-
raise ValueError(f"Bad freq suffix {suffix}")
1324-
return cls()
1325-
13261319

13271320
# ----------------------------------------------------------------------
13281321
# RelativeDelta Arithmetic

pandas/core/frame.py

+1-9
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,6 @@
135135
sanitize_index,
136136
to_arrays,
137137
)
138-
from pandas.core.ops.missing import dispatch_fill_zeros
139138
from pandas.core.series import Series
140139
from pandas.core.sorting import ensure_key_mapped
141140

@@ -5734,14 +5733,7 @@ def _arith_op(left, right):
57345733
left, right = ops.fill_binop(left, right, fill_value)
57355734
return func(left, right)
57365735

5737-
if ops.should_series_dispatch(self, other, func):
5738-
# iterate over columns
5739-
new_data = ops.dispatch_to_series(self, other, _arith_op)
5740-
else:
5741-
with np.errstate(all="ignore"):
5742-
res_values = _arith_op(self.values, other.values)
5743-
new_data = dispatch_fill_zeros(func, self.values, other.values, res_values)
5744-
5736+
new_data = ops.dispatch_to_series(self, other, _arith_op)
57455737
return new_data
57465738

57475739
def _construct_result(self, result) -> "DataFrame":

pandas/core/ops/__init__.py

+25-14
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
This is not a public API.
55
"""
66
import operator
7-
from typing import TYPE_CHECKING, Optional, Set
7+
from typing import TYPE_CHECKING, Optional, Set, Type
88

99
import numpy as np
1010

@@ -21,13 +21,11 @@
2121
from pandas.core.ops.array_ops import (
2222
arithmetic_op,
2323
comparison_op,
24-
define_na_arithmetic_op,
2524
get_array_op,
2625
logical_op,
2726
)
2827
from pandas.core.ops.array_ops import comp_method_OBJECT_ARRAY # noqa:F401
2928
from pandas.core.ops.common import unpack_zerodim_and_defer
30-
from pandas.core.ops.dispatch import should_series_dispatch
3129
from pandas.core.ops.docstrings import (
3230
_arith_doc_FRAME,
3331
_flex_comp_doc_FRAME,
@@ -154,7 +152,7 @@ def _maybe_match_name(a, b):
154152
# -----------------------------------------------------------------------------
155153

156154

157-
def _get_frame_op_default_axis(name):
155+
def _get_frame_op_default_axis(name: str) -> Optional[str]:
158156
"""
159157
Only DataFrame cares about default_axis, specifically:
160158
special methods have default_axis=None and flex methods
@@ -277,7 +275,11 @@ def dispatch_to_series(left, right, func, axis=None):
277275
return type(left)(bm)
278276

279277
elif isinstance(right, ABCDataFrame):
280-
assert right._indexed_same(left)
278+
assert left.index.equals(right.index)
279+
assert left.columns.equals(right.columns)
280+
# TODO: The previous assertion `assert right._indexed_same(left)`
281+
# fails in cases with empty columns reached via
282+
# _frame_arith_method_with_reindex
281283

282284
array_op = get_array_op(func)
283285
bm = left._mgr.operate_blockwise(right._mgr, array_op)
@@ -345,6 +347,7 @@ def _arith_method_SERIES(cls, op, special):
345347
Wrapper function for Series arithmetic operations, to avoid
346348
code duplication.
347349
"""
350+
assert special # non-special uses _flex_method_SERIES
348351
op_name = _get_op_name(op, special)
349352

350353
@unpack_zerodim_and_defer(op_name)
@@ -368,6 +371,7 @@ def _comp_method_SERIES(cls, op, special):
368371
Wrapper function for Series arithmetic operations, to avoid
369372
code duplication.
370373
"""
374+
assert special # non-special uses _flex_method_SERIES
371375
op_name = _get_op_name(op, special)
372376

373377
@unpack_zerodim_and_defer(op_name)
@@ -394,6 +398,7 @@ def _bool_method_SERIES(cls, op, special):
394398
Wrapper function for Series arithmetic operations, to avoid
395399
code duplication.
396400
"""
401+
assert special # non-special uses _flex_method_SERIES
397402
op_name = _get_op_name(op, special)
398403

399404
@unpack_zerodim_and_defer(op_name)
@@ -412,6 +417,7 @@ def wrapper(self, other):
412417

413418

414419
def _flex_method_SERIES(cls, op, special):
420+
assert not special # "special" also means "not flex"
415421
name = _get_op_name(op, special)
416422
doc = _make_flex_doc(name, "series")
417423

@@ -574,7 +580,7 @@ def to_series(right):
574580

575581

576582
def _should_reindex_frame_op(
577-
left: "DataFrame", right, op, axis, default_axis: int, fill_value, level
583+
left: "DataFrame", right, op, axis, default_axis, fill_value, level
578584
) -> bool:
579585
"""
580586
Check if this is an operation between DataFrames that will need to reindex.
@@ -629,11 +635,12 @@ def _frame_arith_method_with_reindex(
629635
return result.reindex(join_columns, axis=1)
630636

631637

632-
def _arith_method_FRAME(cls, op, special):
638+
def _arith_method_FRAME(cls: Type["DataFrame"], op, special: bool):
639+
# This is the only function where `special` can be either True or False
633640
op_name = _get_op_name(op, special)
634641
default_axis = _get_frame_op_default_axis(op_name)
635642

636-
na_op = define_na_arithmetic_op(op)
643+
na_op = get_array_op(op)
637644
is_logical = op.__name__.strip("_").lstrip("_") in ["and", "or", "xor"]
638645

639646
if op_name in _op_descriptions:
@@ -650,18 +657,19 @@ def f(self, other, axis=default_axis, level=None, fill_value=None):
650657
):
651658
return _frame_arith_method_with_reindex(self, other, op)
652659

660+
# TODO: why are we passing flex=True instead of flex=not special?
661+
# 15 tests fail if we pass flex=not special instead
653662
self, other = _align_method_FRAME(self, other, axis, flex=True, level=level)
654663

655664
if isinstance(other, ABCDataFrame):
656665
# Another DataFrame
657-
pass_op = op if should_series_dispatch(self, other, op) else na_op
658-
pass_op = pass_op if not is_logical else op
659-
660-
new_data = self._combine_frame(other, pass_op, fill_value)
666+
new_data = self._combine_frame(other, na_op, fill_value)
661667

662668
elif isinstance(other, ABCSeries):
663669
# For these values of `axis`, we end up dispatching to Series op,
664670
# so do not want the masked op.
671+
# TODO: the above comment is no longer accurate since we now
672+
# operate blockwise if other._values is an ndarray
665673
pass_op = op if axis in [0, "columns", None] else na_op
666674
pass_op = pass_op if not is_logical else op
667675

@@ -684,9 +692,11 @@ def f(self, other, axis=default_axis, level=None, fill_value=None):
684692
return f
685693

686694

687-
def _flex_comp_method_FRAME(cls, op, special):
695+
def _flex_comp_method_FRAME(cls: Type["DataFrame"], op, special: bool):
696+
assert not special # "special" also means "not flex"
688697
op_name = _get_op_name(op, special)
689698
default_axis = _get_frame_op_default_axis(op_name)
699+
assert default_axis == "columns", default_axis # because we are not "special"
690700

691701
doc = _flex_comp_doc_FRAME.format(
692702
op_name=op_name, desc=_op_descriptions[op_name]["desc"]
@@ -715,7 +725,8 @@ def f(self, other, axis=default_axis, level=None):
715725
return f
716726

717727

718-
def _comp_method_FRAME(cls, op, special):
728+
def _comp_method_FRAME(cls: Type["DataFrame"], op, special: bool):
729+
assert special # "special" also means "not flex"
719730
op_name = _get_op_name(op, special)
720731

721732
@Appender(f"Wrapper for comparison method {op_name}")

pandas/core/ops/array_ops.py

+6-8
Original file line numberDiff line numberDiff line change
@@ -121,13 +121,6 @@ def masked_arith_op(x: np.ndarray, y, op):
121121
return result
122122

123123

124-
def define_na_arithmetic_op(op):
125-
def na_op(x, y):
126-
return na_arithmetic_op(x, y, op)
127-
128-
return na_op
129-
130-
131124
def na_arithmetic_op(left, right, op, is_cmp: bool = False):
132125
"""
133126
Return the result of evaluating op on the passed in values.
@@ -378,8 +371,13 @@ def get_array_op(op):
378371
379372
Returns
380373
-------
381-
function
374+
functools.partial
382375
"""
376+
if isinstance(op, partial):
377+
# We get here via dispatch_to_series in DataFrame case
378+
# TODO: avoid getting here
379+
return op
380+
383381
op_name = op.__name__.strip("_")
384382
if op_name in {"eq", "ne", "lt", "le", "gt", "ge"}:
385383
return partial(comparison_op, op=op)

pandas/core/ops/dispatch.py

-60
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,6 @@
55

66
from pandas._typing import ArrayLike
77

8-
from pandas.core.dtypes.common import (
9-
is_datetime64_dtype,
10-
is_integer_dtype,
11-
is_object_dtype,
12-
is_timedelta64_dtype,
13-
)
148
from pandas.core.dtypes.generic import ABCExtensionArray
159

1610

@@ -28,57 +22,3 @@ def should_extension_dispatch(left: ArrayLike, right: Any) -> bool:
2822
bool
2923
"""
3024
return isinstance(left, ABCExtensionArray) or isinstance(right, ABCExtensionArray)
31-
32-
33-
def should_series_dispatch(left, right, op):
34-
"""
35-
Identify cases where a DataFrame operation should dispatch to its
36-
Series counterpart.
37-
38-
Parameters
39-
----------
40-
left : DataFrame
41-
right : DataFrame or Series
42-
op : binary operator
43-
44-
Returns
45-
-------
46-
override : bool
47-
"""
48-
if left._is_mixed_type or right._is_mixed_type:
49-
return True
50-
51-
if op.__name__.strip("_") in ["and", "or", "xor", "rand", "ror", "rxor"]:
52-
# TODO: GH references for what this fixes
53-
# Note: this check must come before the check for nonempty columns.
54-
return True
55-
56-
if right.ndim == 1:
57-
# operating with Series, short-circuit checks that would fail
58-
# with AttributeError.
59-
return False
60-
61-
if not len(left.columns) or not len(right.columns):
62-
# ensure obj.dtypes[0] exists for each obj
63-
return False
64-
65-
ldtype = left.dtypes.iloc[0]
66-
rdtype = right.dtypes.iloc[0]
67-
68-
if (
69-
is_timedelta64_dtype(ldtype)
70-
and (is_integer_dtype(rdtype) or is_object_dtype(rdtype))
71-
) or (
72-
is_timedelta64_dtype(rdtype)
73-
and (is_integer_dtype(ldtype) or is_object_dtype(ldtype))
74-
):
75-
# numpy integer dtypes as timedelta64 dtypes in this scenario
76-
return True
77-
78-
if (is_datetime64_dtype(ldtype) and is_object_dtype(rdtype)) or (
79-
is_datetime64_dtype(rdtype) and is_object_dtype(ldtype)
80-
):
81-
# in particular case where one is an array of DateOffsets
82-
return True
83-
84-
return False

pandas/core/ops/methods.py

-1
Original file line numberDiff line numberDiff line change
@@ -207,7 +207,6 @@ def _create_methods(cls, arith_method, comp_method, bool_method, special):
207207
dict(
208208
and_=bool_method(cls, operator.and_, special),
209209
or_=bool_method(cls, operator.or_, special),
210-
# For some reason ``^`` wasn't used in original.
211210
xor=bool_method(cls, operator.xor, special),
212211
rand_=bool_method(cls, rand_, special),
213212
ror_=bool_method(cls, ror_, special),

pandas/tests/arithmetic/test_timedelta64.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -552,7 +552,10 @@ def test_tda_add_dt64_object_array(self, box_with_array, tz_naive_fixture):
552552
obj = tm.box_expected(tdi, box)
553553
other = tm.box_expected(dti, box)
554554

555-
with tm.assert_produces_warning(PerformanceWarning):
555+
warn = None
556+
if box is not pd.DataFrame or tz_naive_fixture is None:
557+
warn = PerformanceWarning
558+
with tm.assert_produces_warning(warn):
556559
result = obj + other.astype(object)
557560
tm.assert_equal(result, other)
558561

pandas/tseries/offsets.py

+5-7
Original file line numberDiff line numberDiff line change
@@ -790,7 +790,7 @@ def __init__(
790790
# Month-Based Offset Classes
791791

792792

793-
class MonthEnd(SingleConstructorMixin, liboffsets.MonthOffset):
793+
class MonthEnd(liboffsets.MonthOffset):
794794
"""
795795
DateOffset of one month end.
796796
"""
@@ -799,7 +799,7 @@ class MonthEnd(SingleConstructorMixin, liboffsets.MonthOffset):
799799
_day_opt = "end"
800800

801801

802-
class MonthBegin(SingleConstructorMixin, liboffsets.MonthOffset):
802+
class MonthBegin(liboffsets.MonthOffset):
803803
"""
804804
DateOffset of one month at beginning.
805805
"""
@@ -808,7 +808,7 @@ class MonthBegin(SingleConstructorMixin, liboffsets.MonthOffset):
808808
_day_opt = "start"
809809

810810

811-
class BusinessMonthEnd(SingleConstructorMixin, liboffsets.MonthOffset):
811+
class BusinessMonthEnd(liboffsets.MonthOffset):
812812
"""
813813
DateOffset increments between business EOM dates.
814814
"""
@@ -817,7 +817,7 @@ class BusinessMonthEnd(SingleConstructorMixin, liboffsets.MonthOffset):
817817
_day_opt = "business_end"
818818

819819

820-
class BusinessMonthBegin(SingleConstructorMixin, liboffsets.MonthOffset):
820+
class BusinessMonthBegin(liboffsets.MonthOffset):
821821
"""
822822
DateOffset of one business month at beginning.
823823
"""
@@ -827,9 +827,7 @@ class BusinessMonthBegin(SingleConstructorMixin, liboffsets.MonthOffset):
827827

828828

829829
@doc(bound="bound")
830-
class _CustomBusinessMonth(
831-
CustomMixin, BusinessMixin, SingleConstructorMixin, liboffsets.MonthOffset
832-
):
830+
class _CustomBusinessMonth(CustomMixin, BusinessMixin, liboffsets.MonthOffset):
833831
"""
834832
DateOffset subclass representing custom business month(s).
835833

0 commit comments

Comments
 (0)