From f8d715d32e33bd5156a5750e838afdecaa6d0c13 Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 5 Mar 2023 18:30:13 -0800 Subject: [PATCH 01/10] REF: move alignment functions to DataFrame/Series methods --- pandas/core/arrays/timedeltas.py | 6 +- pandas/core/computation/expressions.py | 2 +- pandas/core/frame.py | 241 ++++++++++++++++- pandas/core/generic.py | 3 +- pandas/core/ops/__init__.py | 284 +-------------------- pandas/core/ops/array_ops.py | 6 +- pandas/core/ops/methods.py | 2 +- pandas/core/ops/missing.py | 2 +- pandas/core/series.py | 23 +- pandas/tests/arithmetic/test_datetime64.py | 2 +- pandas/tests/frame/test_arithmetic.py | 6 +- pandas/tests/scalar/test_nat.py | 2 +- 12 files changed, 278 insertions(+), 301 deletions(-) diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index d38145295a4db..329d6a937728a 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -63,12 +63,14 @@ ) from pandas.core.dtypes.missing import isna -from pandas.core import nanops +from pandas.core import ( + nanops, + roperator, +) from pandas.core.array_algos import datetimelike_accumulations from pandas.core.arrays import datetimelike as dtl from pandas.core.arrays._ranges import generate_regular_range import pandas.core.common as com -from pandas.core.ops import roperator from pandas.core.ops.common import unpack_zerodim_and_defer if TYPE_CHECKING: diff --git a/pandas/core/computation/expressions.py b/pandas/core/computation/expressions.py index 10b0670a78d6f..6219cac4aeb16 100644 --- a/pandas/core/computation/expressions.py +++ b/pandas/core/computation/expressions.py @@ -17,8 +17,8 @@ from pandas.util._exceptions import find_stack_level +from pandas.core import roperator from pandas.core.computation.check import NUMEXPR_INSTALLED -from pandas.core.ops import roperator if NUMEXPR_INSTALLED: import numexpr as ne diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 98acab52e62f0..12fcb6c9ffb11 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -15,6 +15,7 @@ import functools from io import StringIO import itertools +import operator import sys from textwrap import dedent from typing import ( @@ -88,6 +89,7 @@ from pandas.core.dtypes.common import ( infer_dtype_from_object, is_1d_only_ea_dtype, + is_array_like, is_bool_dtype, is_dataclass, is_dict_like, @@ -116,6 +118,7 @@ common as com, nanops, ops, + roperator, ) from pandas.core.accessor import CachedAccessor from pandas.core.apply import ( @@ -7453,26 +7456,254 @@ class diet def _cmp_method(self, other, op): axis: Literal[1] = 1 # only relevant for Series other case - self, other = ops.align_method_FRAME(self, other, axis, flex=False, level=None) + self, other = self._align_for_op(other, axis, flex=False, level=None) # See GH#4537 for discussion of scalar op behavior new_data = self._dispatch_frame_op(other, op, axis=axis) return self._construct_result(new_data) def _arith_method(self, other, op): - if ops.should_reindex_frame_op(self, other, op, 1, None, None): - return ops.frame_arith_method_with_reindex(self, other, op) + if self._should_reindex_frame_op(other, op, 1, None, None): + return self._arith_method_with_reindex(other, op) axis: Literal[1] = 1 # only relevant for Series other case other = ops.maybe_prepare_scalar_for_op(other, (self.shape[axis],)) - self, other = ops.align_method_FRAME(self, other, axis, flex=True, level=None) + self, other = self._align_for_op(other, axis, flex=True, level=None) new_data = self._dispatch_frame_op(other, op, axis=axis) return self._construct_result(new_data) _logical_method = _arith_method + def _arith_method_with_reindex(self, right: DataFrame, op) -> DataFrame: + """ + For DataFrame-with-DataFrame operations that require reindexing, + operate only on shared columns, then reindex. + + Parameters + ---------- + right : DataFrame + op : binary operator + + Returns + ------- + DataFrame + """ + left = self + + # GH#31623, only operate on shared columns + cols, lcols, rcols = left.columns.join( + right.columns, how="inner", level=None, return_indexers=True + ) + + new_left = left.iloc[:, lcols] + new_right = right.iloc[:, rcols] + result = op(new_left, new_right) + + # Do the join on the columns instead of using left._align_for_op + # to avoid constructing two potentially large/sparse DataFrames + join_columns, _, _ = left.columns.join( + right.columns, how="outer", level=None, return_indexers=True + ) + + if result.columns.has_duplicates: + # Avoid reindexing with a duplicate axis. + # https://github.com/pandas-dev/pandas/issues/35194 + indexer, _ = result.columns.get_indexer_non_unique(join_columns) + indexer = algorithms.unique1d(indexer) + result = result._reindex_with_indexers( + {1: [join_columns, indexer]}, allow_dups=True + ) + else: + result = result.reindex(join_columns, axis=1) + + return result + + def _should_reindex_frame_op( + self: DataFrame, right, op, axis: int, fill_value, level + ) -> bool: + """ + Check if this is an operation between DataFrames that will need to reindex. + """ + if op is operator.pow or op is roperator.rpow: + # GH#32685 pow has special semantics for operating with null values + return False + + if not isinstance(right, DataFrame): + return False + + if fill_value is None and level is None and axis == 1: + # TODO: any other cases we should handle here? + + # Intersection is always unique so we have to check the unique columns + left_uniques = self.columns.unique() + right_uniques = right.columns.unique() + cols = left_uniques.intersection(right_uniques) + if len(cols) and not ( + len(cols) == len(left_uniques) and len(cols) == len(right_uniques) + ): + # TODO: is there a shortcut available when len(cols) == 0? + return True + + return False + + def _align_for_op( + self, other, axis, flex: bool | None = False, level: Level = None + ): + """ + Convert rhs to meet lhs dims if input is list, tuple or np.ndarray. + + Parameters + ---------- + left : DataFrame + right : Any + axis : int, str, or None + flex : bool or None, default False + Whether this is a flex op, in which case we reindex. + None indicates not to check for alignment. + level : int or level name, default None + + Returns + ------- + left : DataFrame + right : Any + """ + left, right = self, other + + def to_series(right): + msg = ( + "Unable to coerce to Series, " + "length must be {req_len}: given {given_len}" + ) + + # pass dtype to avoid doing inference, which would break consistency + # with Index/Series ops + dtype = None + if getattr(right, "dtype", None) == object: + # can't pass right.dtype unconditionally as that would break on e.g. + # datetime64[h] ndarray + dtype = object + + if axis is not None and left._get_axis_name(axis) == "index": + if len(left.index) != len(right): + raise ValueError( + msg.format(req_len=len(left.index), given_len=len(right)) + ) + right = left._constructor_sliced(right, index=left.index, dtype=dtype) + else: + if len(left.columns) != len(right): + raise ValueError( + msg.format(req_len=len(left.columns), given_len=len(right)) + ) + right = left._constructor_sliced(right, index=left.columns, dtype=dtype) + return right + + if isinstance(right, np.ndarray): + if right.ndim == 1: + right = to_series(right) + + elif right.ndim == 2: + # We need to pass dtype=right.dtype to retain object dtype + # otherwise we lose consistency with Index and array ops + dtype = None + if getattr(right, "dtype", None) == object: + # can't pass right.dtype unconditionally as that would break on e.g. + # datetime64[h] ndarray + dtype = object + + if right.shape == left.shape: + right = left._constructor( + right, index=left.index, columns=left.columns, dtype=dtype + ) + + elif right.shape[0] == left.shape[0] and right.shape[1] == 1: + # Broadcast across columns + right = np.broadcast_to(right, left.shape) + right = left._constructor( + right, index=left.index, columns=left.columns, dtype=dtype + ) + + elif right.shape[1] == left.shape[1] and right.shape[0] == 1: + # Broadcast along rows + right = to_series(right[0, :]) + + else: + raise ValueError( + "Unable to coerce to DataFrame, shape " + f"must be {left.shape}: given {right.shape}" + ) + + elif right.ndim > 2: + raise ValueError( + "Unable to coerce to Series/DataFrame, " + f"dimension must be <= 2: {right.shape}" + ) + + elif is_list_like(right) and not isinstance(right, (Series, DataFrame)): + # GH#36702. Raise when attempting arithmetic with list of array-like. + if any(is_array_like(el) for el in right): + raise ValueError( + f"Unable to coerce list of {type(right[0])} to Series/DataFrame" + ) + # GH#17901 + right = to_series(right) + + if flex is not None and isinstance(right, DataFrame): + if not left._indexed_same(right): + if flex: + left, right = left.align( + right, join="outer", level=level, copy=False + ) + else: + raise ValueError( + "Can only compare identically-labeled (both index and columns) " + "DataFrame objects" + ) + elif isinstance(right, Series): + # axis=1 is default for DataFrame-with-Series op + axis = left._get_axis_number(axis) if axis is not None else 1 + + if not flex: + if not left.axes[axis].equals(right.index): + raise ValueError( + "Operands are not aligned. Do " + "`left, right = left.align(right, axis=1, copy=False)` " + "before operating." + ) + + left, right = left.align( + right, join="outer", axis=axis, level=level, copy=False + ) + right = left._maybe_align_series_as_frame(right, axis) + + return left, right + + def _maybe_align_series_as_frame(self, series: Series, axis: AxisInt): + """ + If the Series operand is not EA-dtype, we can broadcast to 2D and operate + blockwise. + """ + rvalues = series._values + if not isinstance(rvalues, np.ndarray): + # TODO(EA2D): no need to special-case with 2D EAs + if rvalues.dtype in ("datetime64[ns]", "timedelta64[ns]"): + # We can losslessly+cheaply cast to ndarray + rvalues = np.asarray(rvalues) + else: + return series + + if axis == 0: + rvalues = rvalues.reshape(-1, 1) + else: + rvalues = rvalues.reshape(1, -1) + + rvalues = np.broadcast_to(rvalues, self.shape) + # pass dtype to avoid doing self + return type(self)( + rvalues, index=self.index, columns=self.columns, dtype=rvalues.dtype + ) + def _dispatch_frame_op(self, right, func: Callable, axis: AxisInt | None = None): """ Evaluate the frame operation func(left, right) by evaluating @@ -7503,7 +7734,7 @@ def _dispatch_frame_op(self, right, func: Callable, axis: AxisInt | None = None) assert self.columns.equals(right.columns) # TODO: The previous assertion `assert right._indexed_same(self)` # fails in cases with empty columns reached via - # _frame_arith_method_with_reindex + # _arith_method_with_reindex # TODO operate_blockwise expects a manager of the same type with np.errstate(all="ignore"): diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 003e4cc5b8b23..f2e39b5c1d0fc 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -169,7 +169,6 @@ clean_reindex_fill_method, find_valid_index, ) -from pandas.core.ops import align_method_FRAME from pandas.core.reshape.concat import concat from pandas.core.shared_docs import _shared_docs from pandas.core.sorting import get_indexer_indexer @@ -8069,7 +8068,7 @@ def _clip_with_one_bound(self, threshold, method, axis, inplace): if isinstance(self, ABCSeries): threshold = self._constructor(threshold, index=self.index) else: - threshold = align_method_FRAME(self, threshold, axis, flex=None)[1] + threshold = self._align_for_op(threshold, axis, flex=None)[1] # GH 40420 # Treat missing thresholds as no bounds, not clipping the values diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index 64619fdc4b8d4..f9cb489ff41f2 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -5,36 +5,20 @@ """ from __future__ import annotations -import operator -from typing import ( - TYPE_CHECKING, - cast, -) +from typing import cast import numpy as np from pandas._libs.ops_dispatch import maybe_dispatch_ufunc_to_dunder_op -from pandas._typing import ( - Axis, - AxisInt, - Level, -) +from pandas._typing import Axis from pandas.util._decorators import Appender -from pandas.core.dtypes.common import ( - is_array_like, - is_list_like, -) from pandas.core.dtypes.generic import ( ABCDataFrame, ABCSeries, ) from pandas.core.dtypes.missing import isna -from pandas.core import ( - algorithms, - roperator, -) from pandas.core.ops.array_ops import ( arithmetic_op, comp_method_OBJECT_ARRAY, @@ -74,12 +58,6 @@ rxor, ) -if TYPE_CHECKING: - from pandas import ( - DataFrame, - Series, - ) - # ----------------------------------------------------------------------------- # constants ARITHMETIC_BINOPS: set[str] = { @@ -154,25 +132,6 @@ def fill_binop(left, right, fill_value): # Series -def align_method_SERIES(left: Series, right, align_asobject: bool = False): - """align lhs and rhs Series""" - # ToDo: Different from align_method_FRAME, list, tuple and ndarray - # are not coerced here - # because Series has inconsistencies described in #13637 - - if isinstance(right, ABCSeries): - # avoid repeated alignment - if not left.index.equals(right.index): - if align_asobject: - # to keep original value's dtype for bool ops - left = left.astype(object) - right = right.astype(object) - - left, right = left.align(right, copy=False) - - return left, right - - def flex_method_SERIES(op): name = op.__name__.strip("_") doc = make_flex_doc(name, "series") @@ -208,233 +167,6 @@ def flex_wrapper(self, other, level=None, fill_value=None, axis: Axis = 0): # DataFrame -def align_method_FRAME( - left, right, axis, flex: bool | None = False, level: Level = None -): - """ - Convert rhs to meet lhs dims if input is list, tuple or np.ndarray. - - Parameters - ---------- - left : DataFrame - right : Any - axis : int, str, or None - flex : bool or None, default False - Whether this is a flex op, in which case we reindex. - None indicates not to check for alignment. - level : int or level name, default None - - Returns - ------- - left : DataFrame - right : Any - """ - - def to_series(right): - msg = "Unable to coerce to Series, length must be {req_len}: given {given_len}" - - # pass dtype to avoid doing inference, which would break consistency - # with Index/Series ops - dtype = None - if getattr(right, "dtype", None) == object: - # can't pass right.dtype unconditionally as that would break on e.g. - # datetime64[h] ndarray - dtype = object - - if axis is not None and left._get_axis_name(axis) == "index": - if len(left.index) != len(right): - raise ValueError( - msg.format(req_len=len(left.index), given_len=len(right)) - ) - right = left._constructor_sliced(right, index=left.index, dtype=dtype) - else: - if len(left.columns) != len(right): - raise ValueError( - msg.format(req_len=len(left.columns), given_len=len(right)) - ) - right = left._constructor_sliced(right, index=left.columns, dtype=dtype) - return right - - if isinstance(right, np.ndarray): - if right.ndim == 1: - right = to_series(right) - - elif right.ndim == 2: - # We need to pass dtype=right.dtype to retain object dtype - # otherwise we lose consistency with Index and array ops - dtype = None - if getattr(right, "dtype", None) == object: - # can't pass right.dtype unconditionally as that would break on e.g. - # datetime64[h] ndarray - dtype = object - - if right.shape == left.shape: - right = left._constructor( - right, index=left.index, columns=left.columns, dtype=dtype - ) - - elif right.shape[0] == left.shape[0] and right.shape[1] == 1: - # Broadcast across columns - right = np.broadcast_to(right, left.shape) - right = left._constructor( - right, index=left.index, columns=left.columns, dtype=dtype - ) - - elif right.shape[1] == left.shape[1] and right.shape[0] == 1: - # Broadcast along rows - right = to_series(right[0, :]) - - else: - raise ValueError( - "Unable to coerce to DataFrame, shape " - f"must be {left.shape}: given {right.shape}" - ) - - elif right.ndim > 2: - raise ValueError( - "Unable to coerce to Series/DataFrame, " - f"dimension must be <= 2: {right.shape}" - ) - - elif is_list_like(right) and not isinstance(right, (ABCSeries, ABCDataFrame)): - # GH 36702. Raise when attempting arithmetic with list of array-like. - if any(is_array_like(el) for el in right): - raise ValueError( - f"Unable to coerce list of {type(right[0])} to Series/DataFrame" - ) - # GH17901 - right = to_series(right) - - if flex is not None and isinstance(right, ABCDataFrame): - if not left._indexed_same(right): - if flex: - left, right = left.align(right, join="outer", level=level, copy=False) - else: - raise ValueError( - "Can only compare identically-labeled (both index and columns) " - "DataFrame objects" - ) - elif isinstance(right, ABCSeries): - # axis=1 is default for DataFrame-with-Series op - axis = left._get_axis_number(axis) if axis is not None else 1 - - if not flex: - if not left.axes[axis].equals(right.index): - raise ValueError( - "Operands are not aligned. Do " - "`left, right = left.align(right, axis=1, copy=False)` " - "before operating." - ) - - left, right = left.align( - right, join="outer", axis=axis, level=level, copy=False - ) - right = _maybe_align_series_as_frame(left, right, axis) - - return left, right - - -def should_reindex_frame_op( - left: DataFrame, right, op, axis: int, fill_value, level -) -> bool: - """ - Check if this is an operation between DataFrames that will need to reindex. - """ - assert isinstance(left, ABCDataFrame) - - if op is operator.pow or op is roperator.rpow: - # GH#32685 pow has special semantics for operating with null values - return False - - if not isinstance(right, ABCDataFrame): - return False - - if fill_value is None and level is None and axis == 1: - # TODO: any other cases we should handle here? - - # Intersection is always unique so we have to check the unique columns - left_uniques = left.columns.unique() - right_uniques = right.columns.unique() - cols = left_uniques.intersection(right_uniques) - if len(cols) and not ( - len(cols) == len(left_uniques) and len(cols) == len(right_uniques) - ): - # TODO: is there a shortcut available when len(cols) == 0? - return True - - return False - - -def frame_arith_method_with_reindex(left: DataFrame, right: DataFrame, op) -> DataFrame: - """ - For DataFrame-with-DataFrame operations that require reindexing, - operate only on shared columns, then reindex. - - Parameters - ---------- - left : DataFrame - right : DataFrame - op : binary operator - - Returns - ------- - DataFrame - """ - # GH#31623, only operate on shared columns - cols, lcols, rcols = left.columns.join( - right.columns, how="inner", level=None, return_indexers=True - ) - - new_left = left.iloc[:, lcols] - new_right = right.iloc[:, rcols] - result = op(new_left, new_right) - - # Do the join on the columns instead of using align_method_FRAME - # to avoid constructing two potentially large/sparse DataFrames - join_columns, _, _ = left.columns.join( - right.columns, how="outer", level=None, return_indexers=True - ) - - if result.columns.has_duplicates: - # Avoid reindexing with a duplicate axis. - # https://github.com/pandas-dev/pandas/issues/35194 - indexer, _ = result.columns.get_indexer_non_unique(join_columns) - indexer = algorithms.unique1d(indexer) - result = result._reindex_with_indexers( - {1: [join_columns, indexer]}, allow_dups=True - ) - else: - result = result.reindex(join_columns, axis=1) - - return result - - -def _maybe_align_series_as_frame(frame: DataFrame, series: Series, axis: AxisInt): - """ - If the Series operand is not EA-dtype, we can broadcast to 2D and operate - blockwise. - """ - rvalues = series._values - if not isinstance(rvalues, np.ndarray): - # TODO(EA2D): no need to special-case with 2D EAs - if rvalues.dtype in ("datetime64[ns]", "timedelta64[ns]"): - # We can losslessly+cheaply cast to ndarray - rvalues = np.asarray(rvalues) - else: - return series - - if axis == 0: - rvalues = rvalues.reshape(-1, 1) - else: - rvalues = rvalues.reshape(1, -1) - - rvalues = np.broadcast_to(rvalues, frame.shape) - # pass dtype to avoid doing inference - return type(frame)( - rvalues, index=frame.index, columns=frame.columns, dtype=rvalues.dtype - ) - - def flex_arith_method_FRAME(op): op_name = op.__name__.strip("_") @@ -446,8 +178,8 @@ def f(self, other, axis: Axis = "columns", level=None, fill_value=None): axis = self._get_axis_number(axis) if axis is not None else 1 axis = cast(int, axis) - if should_reindex_frame_op(self, other, op, axis, fill_value, level): - return frame_arith_method_with_reindex(self, other, op) + if self._should_reindex_frame_op(other, op, axis, fill_value, level): + return self._arith_method_with_reindex(other, op) if isinstance(other, ABCSeries) and fill_value is not None: # TODO: We could allow this in cases where we end up going @@ -455,7 +187,7 @@ def f(self, other, axis: Axis = "columns", level=None, fill_value=None): raise NotImplementedError(f"fill_value {fill_value} not supported.") other = maybe_prepare_scalar_for_op(other, self.shape) - self, other = align_method_FRAME(self, other, axis, flex=True, level=level) + self, other = self._align_for_op(other, axis, flex=True, level=level) if isinstance(other, ABCDataFrame): # Another DataFrame @@ -488,7 +220,7 @@ def flex_comp_method_FRAME(op): def f(self, other, axis: Axis = "columns", level=None): axis = self._get_axis_number(axis) if axis is not None else 1 - self, other = align_method_FRAME(self, other, axis, flex=True, level=level) + self, other = self._align_for_op(other, axis, flex=True, level=level) new_data = self._dispatch_frame_op(other, op, axis=axis) return self._construct_result(new_data) @@ -500,8 +232,6 @@ def f(self, other, axis: Axis = "columns", level=None): __all__ = [ "add_flex_arithmetic_methods", - "align_method_FRAME", - "align_method_SERIES", "ARITHMETIC_BINOPS", "arithmetic_op", "COMPARISON_BINOPS", @@ -511,7 +241,6 @@ def f(self, other, axis: Axis = "columns", level=None): "flex_arith_method_FRAME", "flex_comp_method_FRAME", "flex_method_SERIES", - "frame_arith_method_with_reindex", "invalid_comparison", "kleene_and", "kleene_or", @@ -530,6 +259,5 @@ def f(self, other, axis: Axis = "columns", level=None): "rsub", "rtruediv", "rxor", - "should_reindex_frame_op", "unpack_zerodim_and_defer", ] diff --git a/pandas/core/ops/array_ops.py b/pandas/core/ops/array_ops.py index bc05e9a3d7c3f..dfffe77fe1b76 100644 --- a/pandas/core/ops/array_ops.py +++ b/pandas/core/ops/array_ops.py @@ -47,12 +47,10 @@ notna, ) +from pandas.core import roperator from pandas.core.computation import expressions from pandas.core.construction import ensure_wrapped_if_datetimelike -from pandas.core.ops import ( - missing, - roperator, -) +from pandas.core.ops import missing from pandas.core.ops.dispatch import should_extension_dispatch from pandas.core.ops.invalid import invalid_comparison diff --git a/pandas/core/ops/methods.py b/pandas/core/ops/methods.py index be7c1205305e4..dda20d2fe5adb 100644 --- a/pandas/core/ops/methods.py +++ b/pandas/core/ops/methods.py @@ -10,7 +10,7 @@ ABCSeries, ) -from pandas.core.ops import roperator +from pandas.core import roperator def _get_method_wrappers(cls): diff --git a/pandas/core/ops/missing.py b/pandas/core/ops/missing.py index 3ba611c3bd0df..0866c44d62ca0 100644 --- a/pandas/core/ops/missing.py +++ b/pandas/core/ops/missing.py @@ -33,7 +33,7 @@ is_scalar, ) -from pandas.core.ops import roperator +from pandas.core import roperator def _fill_zeros(result, x, y): diff --git a/pandas/core/series.py b/pandas/core/series.py index 95ee3f1af58f1..2d598fd60c542 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -6027,7 +6027,7 @@ def _cmp_method(self, other, op): def _logical_method(self, other, op): res_name = ops.get_op_result_name(self, other) - self, other = ops.align_method_SERIES(self, other, align_asobject=True) + self, other = self._align_for_op(other, align_asobject=True) lvalues = self._values rvalues = extract_array(other, extract_numpy=True, extract_range=True) @@ -6036,9 +6036,28 @@ def _logical_method(self, other, op): return self._construct_result(res_values, name=res_name) def _arith_method(self, other, op): - self, other = ops.align_method_SERIES(self, other) + self, other = self._align_for_op(other) return base.IndexOpsMixin._arith_method(self, other, op) + def _align_for_op(self, right, align_asobject: bool = False): + """align lhs and rhs Series""" + # TODO: Different from DataFrame._align_for_op, list, tuple and ndarray + # are not coerced here + # because Series has inconsistencies described in GH#13637 + left = self + + if isinstance(right, Series): + # avoid repeated alignment + if not left.index.equals(right.index): + if align_asobject: + # to keep original value's dtype for bool ops + left = left.astype(object) + right = right.astype(object) + + left, right = left.align(right, copy=False) + + return left, right + Series._add_numeric_operations() diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py index 8bbb0452e822f..6c11ae4b30437 100644 --- a/pandas/tests/arithmetic/test_datetime64.py +++ b/pandas/tests/arithmetic/test_datetime64.py @@ -34,7 +34,7 @@ date_range, ) import pandas._testing as tm -from pandas.core.ops import roperator +from pandas.core import roperator from pandas.tests.arithmetic.common import ( assert_cannot_add, assert_invalid_addsub_type, diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index bcc1ae0183b97..b581dfd8c44b0 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -1800,7 +1800,7 @@ def test_alignment_non_pandas(self, val): columns = ["X", "Y", "Z"] df = DataFrame(np.random.randn(3, 3), index=index, columns=columns) - align = pd.core.ops.align_method_FRAME + align = DataFrame._align_for_op expected = DataFrame({"X": val, "Y": val, "Z": val}, index=df.index) tm.assert_frame_equal(align(df, val, "index")[1], expected) @@ -1816,7 +1816,7 @@ def test_alignment_non_pandas_length_mismatch(self, val): columns = ["X", "Y", "Z"] df = DataFrame(np.random.randn(3, 3), index=index, columns=columns) - align = pd.core.ops.align_method_FRAME + align = DataFrame._align_for_op # length mismatch msg = "Unable to coerce to Series, length must be 3: given 2" with pytest.raises(ValueError, match=msg): @@ -1830,7 +1830,7 @@ def test_alignment_non_pandas_index_columns(self): columns = ["X", "Y", "Z"] df = DataFrame(np.random.randn(3, 3), index=index, columns=columns) - align = pd.core.ops.align_method_FRAME + align = DataFrame._align_for_op val = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) tm.assert_frame_equal( align(df, val, "index")[1], diff --git a/pandas/tests/scalar/test_nat.py b/pandas/tests/scalar/test_nat.py index 9aae76aab66c8..4156eca6f42c0 100644 --- a/pandas/tests/scalar/test_nat.py +++ b/pandas/tests/scalar/test_nat.py @@ -27,12 +27,12 @@ offsets, ) import pandas._testing as tm +from pandas.core import roperator from pandas.core.arrays import ( DatetimeArray, PeriodArray, TimedeltaArray, ) -from pandas.core.ops import roperator @pytest.mark.parametrize( From 778412d17b4b417d46e4150c67b48f650b48f77a Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 6 Mar 2023 11:47:45 -0800 Subject: [PATCH 02/10] REF: define flex arithmetic/comparison methods non-dynamically --- pandas/core/arrays/categorical.py | 2 +- pandas/core/arrays/masked.py | 2 +- pandas/core/arrays/numpy_.py | 2 +- pandas/core/arrays/sparse/array.py | 7 +- pandas/core/frame.py | 5 +- pandas/core/ops/__init__.py | 126 +------- pandas/core/ops/docstrings.py | 21 +- pandas/core/ops/methods.py | 442 +++++++++++++++++++++-------- pandas/core/series.py | 6 +- 9 files changed, 355 insertions(+), 258 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index dd48da9ab6c16..be6c8493963ea 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1349,7 +1349,7 @@ def __array__(self, dtype: NpDtype | None = None) -> np.ndarray: def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): # for binary ops, use our custom dunder methods - result = ops.maybe_dispatch_ufunc_to_dunder_op( + result = arraylike.maybe_dispatch_ufunc_to_dunder_op( self, ufunc, method, *inputs, **kwargs ) if result is not NotImplemented: diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index 9b9cb3e29810d..0461b0f528878 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -525,7 +525,7 @@ def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): return NotImplemented # for binary ops, use our custom dunder methods - result = ops.maybe_dispatch_ufunc_to_dunder_op( + result = arraylike.maybe_dispatch_ufunc_to_dunder_op( self, ufunc, method, *inputs, **kwargs ) if result is not NotImplemented: diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index 216dbede39a6a..4effe97f2f04f 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -142,7 +142,7 @@ def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): # in PandasArray, since pandas' ExtensionArrays are 1-d. out = kwargs.get("out", ()) - result = ops.maybe_dispatch_ufunc_to_dunder_op( + result = arraylike.maybe_dispatch_ufunc_to_dunder_op( self, ufunc, method, *inputs, **kwargs ) if result is not NotImplemented: diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index fcebd17ace2d3..78153890745d7 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -78,10 +78,7 @@ notna, ) -from pandas.core import ( - arraylike, - ops, -) +from pandas.core import arraylike import pandas.core.algorithms as algos from pandas.core.arraylike import OpsMixin from pandas.core.arrays import ExtensionArray @@ -1643,7 +1640,7 @@ def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): return NotImplemented # for binary ops, use our custom dunder methods - result = ops.maybe_dispatch_ufunc_to_dunder_op( + result = arraylike.maybe_dispatch_ufunc_to_dunder_op( self, ufunc, method, *inputs, **kwargs ) if result is not NotImplemented: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 12fcb6c9ffb11..3a175096ce5cc 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -175,6 +175,7 @@ treat_as_nested, ) from pandas.core.methods import selectn +from pandas.core.ops.methods import FrameOps from pandas.core.reshape.melt import melt from pandas.core.series import Series from pandas.core.shared_docs import _shared_docs @@ -491,7 +492,7 @@ # DataFrame class -class DataFrame(NDFrame, OpsMixin): +class DataFrame(FrameOps, NDFrame, OpsMixin): """ Two-dimensional, size-mutable, potentially heterogeneous tabular data. @@ -11844,8 +11845,6 @@ def mask( DataFrame._add_numeric_operations() -ops.add_flex_arithmetic_methods(DataFrame) - def _from_nested_dict(data) -> collections.defaultdict: new_data: collections.defaultdict = collections.defaultdict(dict) diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index f9cb489ff41f2..26b1ceef3a0a8 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -5,18 +5,6 @@ """ from __future__ import annotations -from typing import cast - -import numpy as np - -from pandas._libs.ops_dispatch import maybe_dispatch_ufunc_to_dunder_op -from pandas._typing import Axis -from pandas.util._decorators import Appender - -from pandas.core.dtypes.generic import ( - ABCDataFrame, - ABCSeries, -) from pandas.core.dtypes.missing import isna from pandas.core.ops.array_ops import ( @@ -31,18 +19,12 @@ get_op_result_name, unpack_zerodim_and_defer, ) -from pandas.core.ops.docstrings import ( - _flex_comp_doc_FRAME, - _op_descriptions, - make_flex_doc, -) from pandas.core.ops.invalid import invalid_comparison from pandas.core.ops.mask_ops import ( kleene_and, kleene_or, kleene_xor, ) -from pandas.core.ops.methods import add_flex_arithmetic_methods from pandas.core.roperator import ( radd, rand_, @@ -128,118 +110,13 @@ def fill_binop(left, right, fill_value): return left, right -# ----------------------------------------------------------------------------- -# Series - - -def flex_method_SERIES(op): - name = op.__name__.strip("_") - doc = make_flex_doc(name, "series") - - @Appender(doc) - def flex_wrapper(self, other, level=None, fill_value=None, axis: Axis = 0): - # validate axis - if axis is not None: - self._get_axis_number(axis) - - res_name = get_op_result_name(self, other) - - if isinstance(other, ABCSeries): - return self._binop(other, op, level=level, fill_value=fill_value) - elif isinstance(other, (np.ndarray, list, tuple)): - if len(other) != len(self): - raise ValueError("Lengths must be equal") - other = self._constructor(other, self.index) - result = self._binop(other, op, level=level, fill_value=fill_value) - result.name = res_name - return result - else: - if fill_value is not None: - self = self.fillna(fill_value) - - return op(self, other) - - flex_wrapper.__name__ = name - return flex_wrapper - - -# ----------------------------------------------------------------------------- -# DataFrame - - -def flex_arith_method_FRAME(op): - op_name = op.__name__.strip("_") - - na_op = get_array_op(op) - doc = make_flex_doc(op_name, "dataframe") - - @Appender(doc) - def f(self, other, axis: Axis = "columns", level=None, fill_value=None): - axis = self._get_axis_number(axis) if axis is not None else 1 - axis = cast(int, axis) - - if self._should_reindex_frame_op(other, op, axis, fill_value, level): - return self._arith_method_with_reindex(other, op) - - if isinstance(other, ABCSeries) and fill_value is not None: - # TODO: We could allow this in cases where we end up going - # through the DataFrame path - raise NotImplementedError(f"fill_value {fill_value} not supported.") - - other = maybe_prepare_scalar_for_op(other, self.shape) - self, other = self._align_for_op(other, axis, flex=True, level=level) - - if isinstance(other, ABCDataFrame): - # Another DataFrame - new_data = self._combine_frame(other, na_op, fill_value) - - elif isinstance(other, ABCSeries): - new_data = self._dispatch_frame_op(other, op, axis=axis) - else: - # in this case we always have `np.ndim(other) == 0` - if fill_value is not None: - self = self.fillna(fill_value) - - new_data = self._dispatch_frame_op(other, op) - - return self._construct_result(new_data) - - f.__name__ = op_name - - return f - - -def flex_comp_method_FRAME(op): - op_name = op.__name__.strip("_") - - doc = _flex_comp_doc_FRAME.format( - op_name=op_name, desc=_op_descriptions[op_name]["desc"] - ) - - @Appender(doc) - def f(self, other, axis: Axis = "columns", level=None): - axis = self._get_axis_number(axis) if axis is not None else 1 - - self, other = self._align_for_op(other, axis, flex=True, level=level) - - new_data = self._dispatch_frame_op(other, op, axis=axis) - return self._construct_result(new_data) - - f.__name__ = op_name - - return f - - __all__ = [ - "add_flex_arithmetic_methods", "ARITHMETIC_BINOPS", "arithmetic_op", "COMPARISON_BINOPS", "comparison_op", "comp_method_OBJECT_ARRAY", "fill_binop", - "flex_arith_method_FRAME", - "flex_comp_method_FRAME", "flex_method_SERIES", "invalid_comparison", "kleene_and", @@ -260,4 +137,7 @@ def f(self, other, axis: Axis = "columns", level=None): "rtruediv", "rxor", "unpack_zerodim_and_defer", + "get_op_result_name", + "maybe_prepare_scalar_for_op", + "get_array_op", ] diff --git a/pandas/core/ops/docstrings.py b/pandas/core/ops/docstrings.py index cdf1c120719e9..37decdedeb90e 100644 --- a/pandas/core/ops/docstrings.py +++ b/pandas/core/ops/docstrings.py @@ -49,13 +49,20 @@ def make_flex_doc(op_name: str, typ: str) -> str: else: doc = doc_no_examples elif typ == "dataframe": - base_doc = _flex_doc_FRAME - doc = base_doc.format( - desc=op_desc["desc"], - op_name=op_name, - equiv=equiv, - reverse=op_desc["reverse"], - ) + if op_name in ["eq", "ne", "le", "lt", "ge", "gt"]: + base_doc = _flex_comp_doc_FRAME + doc = _flex_comp_doc_FRAME.format( + op_name=op_name, + desc=op_desc["desc"], + ) + else: + base_doc = _flex_doc_FRAME + doc = base_doc.format( + desc=op_desc["desc"], + op_name=op_name, + equiv=equiv, + reverse=op_desc["reverse"], + ) else: raise AssertionError("Invalid typ argument.") return doc diff --git a/pandas/core/ops/methods.py b/pandas/core/ops/methods.py index dda20d2fe5adb..82da3f94f8921 100644 --- a/pandas/core/ops/methods.py +++ b/pandas/core/ops/methods.py @@ -4,121 +4,337 @@ from __future__ import annotations import operator +from typing import cast -from pandas.core.dtypes.generic import ( - ABCDataFrame, - ABCSeries, -) +import numpy as np + +from pandas._typing import Axis +from pandas.util._decorators import Appender from pandas.core import roperator +from pandas.core.ops.array_ops import maybe_prepare_scalar_for_op +from pandas.core.ops.common import get_op_result_name +from pandas.core.ops.docstrings import make_flex_doc + + +class FrameOps: + def _flex_arith_method( + self, other, op, *, axis: Axis = "columns", level=None, fill_value=None + ): + axis = self._get_axis_number(axis) if axis is not None else 1 + axis = cast(int, axis) + + if self._should_reindex_frame_op(other, op, axis, fill_value, level): + return self._arith_method_with_reindex(other, op) + + if isinstance(other, SeriesOps) and fill_value is not None: + # TODO: We could allow this in cases where we end up going + # through the DataFrame path + raise NotImplementedError(f"fill_value {fill_value} not supported.") + + other = maybe_prepare_scalar_for_op(other, self.shape) + self, other = self._align_for_op(other, axis, flex=True, level=level) + + if isinstance(other, FrameOps): + # Another DataFrame + new_data = self._combine_frame(other, op, fill_value) + + elif isinstance(other, SeriesOps): + new_data = self._dispatch_frame_op(other, op, axis=axis) + else: + # in this case we always have `np.ndim(other) == 0` + if fill_value is not None: + self = self.fillna(fill_value) + + new_data = self._dispatch_frame_op(other, op) + + return self._construct_result(new_data) + + def _flex_cmp_method(self, other, op, *, axis: Axis = "columns", level=None): + axis = self._get_axis_number(axis) if axis is not None else 1 + + self, other = self._align_for_op(other, axis, flex=True, level=level) + + new_data = self._dispatch_frame_op(other, op, axis=axis) + return self._construct_result(new_data) + + @Appender(make_flex_doc("eq", "dataframe")) + def eq(self, other, axis: Axis = "columns", level=None): + return self._flex_cmp_method(other, operator.eq, axis=axis, level=level) + + @Appender(make_flex_doc("ne", "dataframe")) + def ne(self, other, axis: Axis = "columns", level=None): + return self._flex_cmp_method(other, operator.ne, axis=axis, level=level) + + @Appender(make_flex_doc("le", "dataframe")) + def le(self, other, axis: Axis = "columns", level=None): + return self._flex_cmp_method(other, operator.le, axis=axis, level=level) + + @Appender(make_flex_doc("lt", "dataframe")) + def lt(self, other, axis: Axis = "columns", level=None): + return self._flex_cmp_method(other, operator.lt, axis=axis, level=level) + + @Appender(make_flex_doc("ge", "dataframe")) + def ge(self, other, axis: Axis = "columns", level=None): + return self._flex_cmp_method(other, operator.ge, axis=axis, level=level) + + @Appender(make_flex_doc("gt", "dataframe")) + def gt(self, other, axis: Axis = "columns", level=None): + return self._flex_cmp_method(other, operator.gt, axis=axis, level=level) + + @Appender(make_flex_doc("add", "dataframe")) + def add(self, other, axis: Axis = "columns", level=None, fill_value=None): + return self._flex_arith_method( + other, operator.add, level=level, fill_value=fill_value, axis=axis + ) + + @Appender(make_flex_doc("radd", "dataframe")) + def radd(self, other, axis: Axis = "columns", level=None, fill_value=None): + return self._flex_arith_method( + other, roperator.radd, level=level, fill_value=fill_value, axis=axis + ) + + @Appender(make_flex_doc("sub", "dataframe")) + def sub(self, other, axis: Axis = "columns", level=None, fill_value=None): + return self._flex_arith_method( + other, operator.sub, level=level, fill_value=fill_value, axis=axis + ) + + subtract = sub + + @Appender(make_flex_doc("rsub", "dataframe")) + def rsub(self, other, axis: Axis = "columns", level=None, fill_value=None): + return self._flex_arith_method( + other, roperator.rsub, level=level, fill_value=fill_value, axis=axis + ) + + @Appender(make_flex_doc("mul", "dataframe")) + def mul(self, other, axis: Axis = "columns", level=None, fill_value=None): + return self._flex_arith_method( + other, operator.mul, level=level, fill_value=fill_value, axis=axis + ) + + multiply = mul + + @Appender(make_flex_doc("rmul", "dataframe")) + def rmul(self, other, axis: Axis = "columns", level=None, fill_value=None): + return self._flex_arith_method( + other, roperator.rmul, level=level, fill_value=fill_value, axis=axis + ) + + @Appender(make_flex_doc("truediv", "dataframe")) + def truediv(self, other, axis: Axis = "columns", level=None, fill_value=None): + return self._flex_arith_method( + other, operator.truediv, level=level, fill_value=fill_value, axis=axis + ) + + div = truediv + divide = truediv + + @Appender(make_flex_doc("rtruediv", "dataframe")) + def rtruediv(self, other, axis: Axis = "columns", level=None, fill_value=None): + return self._flex_arith_method( + other, roperator.rtruediv, level=level, fill_value=fill_value, axis=axis + ) + + rdiv = rtruediv + + @Appender(make_flex_doc("floordiv", "dataframe")) + def floordiv(self, other, axis: Axis = "columns", level=None, fill_value=None): + return self._flex_arith_method( + other, operator.floordiv, level=level, fill_value=fill_value, axis=axis + ) + + @Appender(make_flex_doc("rfloordiv", "dataframe")) + def rfloordiv(self, other, axis: Axis = "columns", level=None, fill_value=None): + return self._flex_arith_method( + other, roperator.rfloordiv, level=level, fill_value=fill_value, axis=axis + ) + + @Appender(make_flex_doc("mod", "dataframe")) + def mod(self, other, axis: Axis = "columns", level=None, fill_value=None): + return self._flex_arith_method( + other, operator.mod, level=level, fill_value=fill_value, axis=axis + ) + + @Appender(make_flex_doc("rmod", "dataframe")) + def rmod(self, other, axis: Axis = "columns", level=None, fill_value=None): + return self._flex_arith_method( + other, roperator.rmod, level=level, fill_value=fill_value, axis=axis + ) + + @Appender(make_flex_doc("pow", "dataframe")) + def pow(self, other, axis: Axis = "columns", level=None, fill_value=None): + return self._flex_arith_method( + other, operator.pow, level=level, fill_value=fill_value, axis=axis + ) + + @Appender(make_flex_doc("rpow", "dataframe")) + def rpow(self, other, axis: Axis = "columns", level=None, fill_value=None): + return self._flex_arith_method( + other, roperator.rpow, level=level, fill_value=fill_value, axis=axis + ) + + +class SeriesOps: + def _flex_method(self, other, op, *, level=None, fill_value=None, axis: Axis = 0): + if axis is not None: + self._get_axis_number(axis) + + res_name = get_op_result_name(self, other) + + if isinstance(other, SeriesOps): + return self._binop(other, op, level=level, fill_value=fill_value) + elif isinstance(other, (np.ndarray, list, tuple)): + if len(other) != len(self): + raise ValueError("Lengths must be equal") + other = self._constructor(other, self.index) + result = self._binop(other, op, level=level, fill_value=fill_value) + result.name = res_name + return result + else: + if fill_value is not None: + self = self.fillna(fill_value) + + return op(self, other) + + @Appender(make_flex_doc("eq", "series")) + def eq(self, other, level=None, fill_value=None, axis: Axis = 0): + return self._flex_method( + other, operator.eq, level=level, fill_value=fill_value, axis=axis + ) + + @Appender(make_flex_doc("ne", "series")) + def ne(self, other, level=None, fill_value=None, axis: Axis = 0): + return self._flex_method( + other, operator.ne, level=level, fill_value=fill_value, axis=axis + ) + + @Appender(make_flex_doc("le", "series")) + def le(self, other, level=None, fill_value=None, axis: Axis = 0): + return self._flex_method( + other, operator.le, level=level, fill_value=fill_value, axis=axis + ) + + @Appender(make_flex_doc("lt", "series")) + def lt(self, other, level=None, fill_value=None, axis: Axis = 0): + return self._flex_method( + other, operator.lt, level=level, fill_value=fill_value, axis=axis + ) + + @Appender(make_flex_doc("ge", "series")) + def ge(self, other, level=None, fill_value=None, axis: Axis = 0): + return self._flex_method( + other, operator.ge, level=level, fill_value=fill_value, axis=axis + ) + + @Appender(make_flex_doc("gt", "series")) + def gt(self, other, level=None, fill_value=None, axis: Axis = 0): + return self._flex_method( + other, operator.gt, level=level, fill_value=fill_value, axis=axis + ) + + @Appender(make_flex_doc("add", "series")) + def add(self, other, level=None, fill_value=None, axis: Axis = 0): + return self._flex_method( + other, operator.add, level=level, fill_value=fill_value, axis=axis + ) + + @Appender(make_flex_doc("radd", "series")) + def radd(self, other, level=None, fill_value=None, axis: Axis = 0): + return self._flex_method( + other, roperator.radd, level=level, fill_value=fill_value, axis=axis + ) + + @Appender(make_flex_doc("sub", "series")) + def sub(self, other, level=None, fill_value=None, axis: Axis = 0): + return self._flex_method( + other, operator.sub, level=level, fill_value=fill_value, axis=axis + ) + + subtract = sub + + @Appender(make_flex_doc("rsub", "series")) + def rsub(self, other, level=None, fill_value=None, axis: Axis = 0): + return self._flex_method( + other, roperator.rsub, level=level, fill_value=fill_value, axis=axis + ) + + @Appender(make_flex_doc("mul", "series")) + def mul(self, other, level=None, fill_value=None, axis: Axis = 0): + return self._flex_method( + other, operator.mul, level=level, fill_value=fill_value, axis=axis + ) + + multiply = mul + + @Appender(make_flex_doc("rmul", "series")) + def rmul(self, other, level=None, fill_value=None, axis: Axis = 0): + return self._flex_method( + other, roperator.rmul, level=level, fill_value=fill_value, axis=axis + ) + + @Appender(make_flex_doc("truediv", "series")) + def truediv(self, other, level=None, fill_value=None, axis: Axis = 0): + return self._flex_method( + other, operator.truediv, level=level, fill_value=fill_value, axis=axis + ) + + div = truediv + divide = truediv + + @Appender(make_flex_doc("rtruediv", "series")) + def rtruediv(self, other, level=None, fill_value=None, axis: Axis = 0): + return self._flex_method( + other, roperator.rtruediv, level=level, fill_value=fill_value, axis=axis + ) + + rdiv = rtruediv + + @Appender(make_flex_doc("floordiv", "series")) + def floordiv(self, other, level=None, fill_value=None, axis: Axis = 0): + return self._flex_method( + other, operator.floordiv, level=level, fill_value=fill_value, axis=axis + ) + + @Appender(make_flex_doc("rfloordiv", "series")) + def rfloordiv(self, other, level=None, fill_value=None, axis: Axis = 0): + return self._flex_method( + other, roperator.rfloordiv, level=level, fill_value=fill_value, axis=axis + ) + + @Appender(make_flex_doc("mod", "series")) + def mod(self, other, level=None, fill_value=None, axis: Axis = 0): + return self._flex_method( + other, operator.mod, level=level, fill_value=fill_value, axis=axis + ) + + @Appender(make_flex_doc("rmod", "series")) + def rmod(self, other, level=None, fill_value=None, axis: Axis = 0): + return self._flex_method( + other, roperator.rmod, level=level, fill_value=fill_value, axis=axis + ) + + @Appender(make_flex_doc("pow", "series")) + def pow(self, other, level=None, fill_value=None, axis: Axis = 0): + return self._flex_method( + other, operator.pow, level=level, fill_value=fill_value, axis=axis + ) + + @Appender(make_flex_doc("rpow", "series")) + def rpow(self, other, level=None, fill_value=None, axis: Axis = 0): + return self._flex_method( + other, roperator.rpow, level=level, fill_value=fill_value, axis=axis + ) + @Appender(make_flex_doc("divmod", "series")) + def divmod(self, other, level=None, fill_value=None, axis: Axis = 0): + return self._flex_method( + other, divmod, level=level, fill_value=fill_value, axis=axis + ) -def _get_method_wrappers(cls): - """ - Find the appropriate operation-wrappers to use when defining flex/special - arithmetic, boolean, and comparison operations with the given class. - - Parameters - ---------- - cls : class - - Returns - ------- - arith_flex : function or None - comp_flex : function or None - """ - # TODO: make these non-runtime imports once the relevant functions - # are no longer in __init__ - from pandas.core.ops import ( - flex_arith_method_FRAME, - flex_comp_method_FRAME, - flex_method_SERIES, - ) - - if issubclass(cls, ABCSeries): - # Just Series - arith_flex = flex_method_SERIES - comp_flex = flex_method_SERIES - elif issubclass(cls, ABCDataFrame): - arith_flex = flex_arith_method_FRAME - comp_flex = flex_comp_method_FRAME - return arith_flex, comp_flex - - -def add_flex_arithmetic_methods(cls) -> None: - """ - Adds the full suite of flex arithmetic methods (``pow``, ``mul``, ``add``) - to the class. - - Parameters - ---------- - cls : class - flex methods will be defined and pinned to this class - """ - flex_arith_method, flex_comp_method = _get_method_wrappers(cls) - new_methods = _create_methods(cls, flex_arith_method, flex_comp_method) - new_methods.update( - { - "multiply": new_methods["mul"], - "subtract": new_methods["sub"], - "divide": new_methods["div"], - } - ) - # opt out of bool flex methods for now - assert not any(kname in new_methods for kname in ("ror_", "rxor", "rand_")) - - _add_methods(cls, new_methods=new_methods) - - -def _create_methods(cls, arith_method, comp_method): - # creates actual flex methods based upon arithmetic, and comp method - # constructors. - - have_divmod = issubclass(cls, ABCSeries) - # divmod is available for Series - - new_methods = {} - - new_methods.update( - { - "add": arith_method(operator.add), - "radd": arith_method(roperator.radd), - "sub": arith_method(operator.sub), - "mul": arith_method(operator.mul), - "truediv": arith_method(operator.truediv), - "floordiv": arith_method(operator.floordiv), - "mod": arith_method(operator.mod), - "pow": arith_method(operator.pow), - "rmul": arith_method(roperator.rmul), - "rsub": arith_method(roperator.rsub), - "rtruediv": arith_method(roperator.rtruediv), - "rfloordiv": arith_method(roperator.rfloordiv), - "rpow": arith_method(roperator.rpow), - "rmod": arith_method(roperator.rmod), - } - ) - new_methods["div"] = new_methods["truediv"] - new_methods["rdiv"] = new_methods["rtruediv"] - if have_divmod: - # divmod doesn't have an op that is supported by numexpr - new_methods["divmod"] = arith_method(divmod) - new_methods["rdivmod"] = arith_method(roperator.rdivmod) - - new_methods.update( - { - "eq": comp_method(operator.eq), - "ne": comp_method(operator.ne), - "lt": comp_method(operator.lt), - "gt": comp_method(operator.gt), - "le": comp_method(operator.le), - "ge": comp_method(operator.ge), - } - ) - - new_methods = {k.strip("_"): v for k, v in new_methods.items()} - return new_methods - - -def _add_methods(cls, new_methods) -> None: - for name, method in new_methods.items(): - setattr(cls, name, method) + @Appender(make_flex_doc("rdivmod", "series")) + def rdivmod(self, other, level=None, fill_value=None, axis: Axis = 0): + return self._flex_method( + other, roperator.rdivmod, level=level, fill_value=fill_value, axis=axis + ) diff --git a/pandas/core/series.py b/pandas/core/series.py index 2d598fd60c542..0ded9b36c1bbd 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -126,6 +126,7 @@ SingleBlockManager, ) from pandas.core.methods import selectn +from pandas.core.ops.methods import SeriesOps from pandas.core.shared_docs import _shared_docs from pandas.core.sorting import ( ensure_key_mapped, @@ -239,7 +240,7 @@ def wrapper(self): # definition in base class "NDFrame" # error: Definition of "min" in base class "IndexOpsMixin" is incompatible with # definition in base class "NDFrame" -class Series(base.IndexOpsMixin, NDFrame): # type: ignore[misc] +class Series(SeriesOps, base.IndexOpsMixin, NDFrame): # type: ignore[misc] """ One-dimensional ndarray with axis labels (including time series). @@ -6060,6 +6061,3 @@ def _align_for_op(self, right, align_asobject: bool = False): Series._add_numeric_operations() - -# Add arithmetic! -ops.add_flex_arithmetic_methods(Series) From 0ee0a80b1d74f550ff0dc7d57b9f02048baf38c0 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 6 Mar 2023 15:32:16 -0800 Subject: [PATCH 03/10] REF: Consolidate Frame/Series arithmetic helpers in ops.methods --- pandas/core/frame.py | 387 +------------------------ pandas/core/ops/methods.py | 570 ++++++++++++++++++++++++++++++++++++- pandas/core/series.py | 75 ----- 3 files changed, 562 insertions(+), 470 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 3a175096ce5cc..da69c39069f4e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -15,7 +15,6 @@ import functools from io import StringIO import itertools -import operator import sys from textwrap import dedent from typing import ( @@ -89,7 +88,6 @@ from pandas.core.dtypes.common import ( infer_dtype_from_object, is_1d_only_ea_dtype, - is_array_like, is_bool_dtype, is_dataclass, is_dict_like, @@ -117,8 +115,6 @@ algorithms, common as com, nanops, - ops, - roperator, ) from pandas.core.accessor import CachedAccessor from pandas.core.apply import ( @@ -492,7 +488,8 @@ # DataFrame class -class DataFrame(FrameOps, NDFrame, OpsMixin): +# Cannot override writable attribute "_get_axis_number" with a final one +class DataFrame(NDFrame, FrameOps, OpsMixin): # type: ignore[misc] """ Two-dimensional, size-mutable, potentially heterogeneous tabular data. @@ -7451,386 +7448,6 @@ class diet result.columns = result.columns.reorder_levels(order) return result - # ---------------------------------------------------------------------- - # Arithmetic Methods - - def _cmp_method(self, other, op): - axis: Literal[1] = 1 # only relevant for Series other case - - self, other = self._align_for_op(other, axis, flex=False, level=None) - - # See GH#4537 for discussion of scalar op behavior - new_data = self._dispatch_frame_op(other, op, axis=axis) - return self._construct_result(new_data) - - def _arith_method(self, other, op): - if self._should_reindex_frame_op(other, op, 1, None, None): - return self._arith_method_with_reindex(other, op) - - axis: Literal[1] = 1 # only relevant for Series other case - other = ops.maybe_prepare_scalar_for_op(other, (self.shape[axis],)) - - self, other = self._align_for_op(other, axis, flex=True, level=None) - - new_data = self._dispatch_frame_op(other, op, axis=axis) - return self._construct_result(new_data) - - _logical_method = _arith_method - - def _arith_method_with_reindex(self, right: DataFrame, op) -> DataFrame: - """ - For DataFrame-with-DataFrame operations that require reindexing, - operate only on shared columns, then reindex. - - Parameters - ---------- - right : DataFrame - op : binary operator - - Returns - ------- - DataFrame - """ - left = self - - # GH#31623, only operate on shared columns - cols, lcols, rcols = left.columns.join( - right.columns, how="inner", level=None, return_indexers=True - ) - - new_left = left.iloc[:, lcols] - new_right = right.iloc[:, rcols] - result = op(new_left, new_right) - - # Do the join on the columns instead of using left._align_for_op - # to avoid constructing two potentially large/sparse DataFrames - join_columns, _, _ = left.columns.join( - right.columns, how="outer", level=None, return_indexers=True - ) - - if result.columns.has_duplicates: - # Avoid reindexing with a duplicate axis. - # https://github.com/pandas-dev/pandas/issues/35194 - indexer, _ = result.columns.get_indexer_non_unique(join_columns) - indexer = algorithms.unique1d(indexer) - result = result._reindex_with_indexers( - {1: [join_columns, indexer]}, allow_dups=True - ) - else: - result = result.reindex(join_columns, axis=1) - - return result - - def _should_reindex_frame_op( - self: DataFrame, right, op, axis: int, fill_value, level - ) -> bool: - """ - Check if this is an operation between DataFrames that will need to reindex. - """ - if op is operator.pow or op is roperator.rpow: - # GH#32685 pow has special semantics for operating with null values - return False - - if not isinstance(right, DataFrame): - return False - - if fill_value is None and level is None and axis == 1: - # TODO: any other cases we should handle here? - - # Intersection is always unique so we have to check the unique columns - left_uniques = self.columns.unique() - right_uniques = right.columns.unique() - cols = left_uniques.intersection(right_uniques) - if len(cols) and not ( - len(cols) == len(left_uniques) and len(cols) == len(right_uniques) - ): - # TODO: is there a shortcut available when len(cols) == 0? - return True - - return False - - def _align_for_op( - self, other, axis, flex: bool | None = False, level: Level = None - ): - """ - Convert rhs to meet lhs dims if input is list, tuple or np.ndarray. - - Parameters - ---------- - left : DataFrame - right : Any - axis : int, str, or None - flex : bool or None, default False - Whether this is a flex op, in which case we reindex. - None indicates not to check for alignment. - level : int or level name, default None - - Returns - ------- - left : DataFrame - right : Any - """ - left, right = self, other - - def to_series(right): - msg = ( - "Unable to coerce to Series, " - "length must be {req_len}: given {given_len}" - ) - - # pass dtype to avoid doing inference, which would break consistency - # with Index/Series ops - dtype = None - if getattr(right, "dtype", None) == object: - # can't pass right.dtype unconditionally as that would break on e.g. - # datetime64[h] ndarray - dtype = object - - if axis is not None and left._get_axis_name(axis) == "index": - if len(left.index) != len(right): - raise ValueError( - msg.format(req_len=len(left.index), given_len=len(right)) - ) - right = left._constructor_sliced(right, index=left.index, dtype=dtype) - else: - if len(left.columns) != len(right): - raise ValueError( - msg.format(req_len=len(left.columns), given_len=len(right)) - ) - right = left._constructor_sliced(right, index=left.columns, dtype=dtype) - return right - - if isinstance(right, np.ndarray): - if right.ndim == 1: - right = to_series(right) - - elif right.ndim == 2: - # We need to pass dtype=right.dtype to retain object dtype - # otherwise we lose consistency with Index and array ops - dtype = None - if getattr(right, "dtype", None) == object: - # can't pass right.dtype unconditionally as that would break on e.g. - # datetime64[h] ndarray - dtype = object - - if right.shape == left.shape: - right = left._constructor( - right, index=left.index, columns=left.columns, dtype=dtype - ) - - elif right.shape[0] == left.shape[0] and right.shape[1] == 1: - # Broadcast across columns - right = np.broadcast_to(right, left.shape) - right = left._constructor( - right, index=left.index, columns=left.columns, dtype=dtype - ) - - elif right.shape[1] == left.shape[1] and right.shape[0] == 1: - # Broadcast along rows - right = to_series(right[0, :]) - - else: - raise ValueError( - "Unable to coerce to DataFrame, shape " - f"must be {left.shape}: given {right.shape}" - ) - - elif right.ndim > 2: - raise ValueError( - "Unable to coerce to Series/DataFrame, " - f"dimension must be <= 2: {right.shape}" - ) - - elif is_list_like(right) and not isinstance(right, (Series, DataFrame)): - # GH#36702. Raise when attempting arithmetic with list of array-like. - if any(is_array_like(el) for el in right): - raise ValueError( - f"Unable to coerce list of {type(right[0])} to Series/DataFrame" - ) - # GH#17901 - right = to_series(right) - - if flex is not None and isinstance(right, DataFrame): - if not left._indexed_same(right): - if flex: - left, right = left.align( - right, join="outer", level=level, copy=False - ) - else: - raise ValueError( - "Can only compare identically-labeled (both index and columns) " - "DataFrame objects" - ) - elif isinstance(right, Series): - # axis=1 is default for DataFrame-with-Series op - axis = left._get_axis_number(axis) if axis is not None else 1 - - if not flex: - if not left.axes[axis].equals(right.index): - raise ValueError( - "Operands are not aligned. Do " - "`left, right = left.align(right, axis=1, copy=False)` " - "before operating." - ) - - left, right = left.align( - right, join="outer", axis=axis, level=level, copy=False - ) - right = left._maybe_align_series_as_frame(right, axis) - - return left, right - - def _maybe_align_series_as_frame(self, series: Series, axis: AxisInt): - """ - If the Series operand is not EA-dtype, we can broadcast to 2D and operate - blockwise. - """ - rvalues = series._values - if not isinstance(rvalues, np.ndarray): - # TODO(EA2D): no need to special-case with 2D EAs - if rvalues.dtype in ("datetime64[ns]", "timedelta64[ns]"): - # We can losslessly+cheaply cast to ndarray - rvalues = np.asarray(rvalues) - else: - return series - - if axis == 0: - rvalues = rvalues.reshape(-1, 1) - else: - rvalues = rvalues.reshape(1, -1) - - rvalues = np.broadcast_to(rvalues, self.shape) - # pass dtype to avoid doing self - return type(self)( - rvalues, index=self.index, columns=self.columns, dtype=rvalues.dtype - ) - - def _dispatch_frame_op(self, right, func: Callable, axis: AxisInt | None = None): - """ - Evaluate the frame operation func(left, right) by evaluating - column-by-column, dispatching to the Series implementation. - - Parameters - ---------- - right : scalar, Series, or DataFrame - func : arithmetic or comparison operator - axis : {None, 0, 1} - - Returns - ------- - DataFrame - """ - # Get the appropriate array-op to apply to each column/block's values. - array_op = ops.get_array_op(func) - - right = lib.item_from_zerodim(right) - if not is_list_like(right): - # i.e. scalar, faster than checking np.ndim(right) == 0 - with np.errstate(all="ignore"): - bm = self._mgr.apply(array_op, right=right) - return self._constructor(bm) - - elif isinstance(right, DataFrame): - assert self.index.equals(right.index) - assert self.columns.equals(right.columns) - # TODO: The previous assertion `assert right._indexed_same(self)` - # fails in cases with empty columns reached via - # _arith_method_with_reindex - - # TODO operate_blockwise expects a manager of the same type - with np.errstate(all="ignore"): - bm = self._mgr.operate_blockwise( - # error: Argument 1 to "operate_blockwise" of "ArrayManager" has - # incompatible type "Union[ArrayManager, BlockManager]"; expected - # "ArrayManager" - # error: Argument 1 to "operate_blockwise" of "BlockManager" has - # incompatible type "Union[ArrayManager, BlockManager]"; expected - # "BlockManager" - right._mgr, # type: ignore[arg-type] - array_op, - ) - return self._constructor(bm) - - elif isinstance(right, Series) and axis == 1: - # axis=1 means we want to operate row-by-row - assert right.index.equals(self.columns) - - right = right._values - # maybe_align_as_frame ensures we do not have an ndarray here - assert not isinstance(right, np.ndarray) - - with np.errstate(all="ignore"): - arrays = [ - array_op(_left, _right) - for _left, _right in zip(self._iter_column_arrays(), right) - ] - - elif isinstance(right, Series): - assert right.index.equals(self.index) # Handle other cases later - right = right._values - - with np.errstate(all="ignore"): - arrays = [array_op(left, right) for left in self._iter_column_arrays()] - - else: - # Remaining cases have less-obvious dispatch rules - raise NotImplementedError(right) - - return type(self)._from_arrays( - arrays, self.columns, self.index, verify_integrity=False - ) - - def _combine_frame(self, other: DataFrame, func, fill_value=None): - # at this point we have `self._indexed_same(other)` - - if fill_value is None: - # since _arith_op may be called in a loop, avoid function call - # overhead if possible by doing this check once - _arith_op = func - - else: - - def _arith_op(left, right): - # for the mixed_type case where we iterate over columns, - # _arith_op(left, right) is equivalent to - # left._binop(right, func, fill_value=fill_value) - left, right = ops.fill_binop(left, right, fill_value) - return func(left, right) - - new_data = self._dispatch_frame_op(other, _arith_op) - return new_data - - def _construct_result(self, result) -> DataFrame: - """ - Wrap the result of an arithmetic, comparison, or logical operation. - - Parameters - ---------- - result : DataFrame - - Returns - ------- - DataFrame - """ - out = self._constructor(result, copy=False).__finalize__(self) - # Pin columns instead of passing to constructor for compat with - # non-unique columns case - out.columns = self.columns - out.index = self.index - return out - - def __divmod__(self, other) -> tuple[DataFrame, DataFrame]: - # Naive implementation, room for optimization - div = self // other - mod = self - div * other - return div, mod - - def __rdivmod__(self, other) -> tuple[DataFrame, DataFrame]: - # Naive implementation, room for optimization - div = other // self - mod = other - div * self - return div, mod - # ---------------------------------------------------------------------- # Combination-Related diff --git a/pandas/core/ops/methods.py b/pandas/core/ops/methods.py index 82da3f94f8921..e4c851fe6ace9 100644 --- a/pandas/core/ops/methods.py +++ b/pandas/core/ops/methods.py @@ -4,25 +4,478 @@ from __future__ import annotations import operator -from typing import cast +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Hashable, + Literal, + cast, +) import numpy as np -from pandas._typing import Axis +from pandas._libs import lib +from pandas._typing import ( + ArrayLike, + Axis, + AxisInt, + Level, +) from pandas.util._decorators import Appender -from pandas.core import roperator -from pandas.core.ops.array_ops import maybe_prepare_scalar_for_op +from pandas.core.dtypes.common import ( + is_array_like, + is_list_like, +) + +from pandas.core import ( + algorithms, + roperator, +) +from pandas.core.ops.array_ops import ( + get_array_op, + maybe_prepare_scalar_for_op, +) from pandas.core.ops.common import get_op_result_name from pandas.core.ops.docstrings import make_flex_doc +if TYPE_CHECKING: + from pandas import ( + DataFrame, + Series, + ) + from pandas.core.internals import ( + ArrayManager, + BlockManager, + ) + class FrameOps: + _get_axis_number: Callable[[Any], int] + _mgr: BlockManager | ArrayManager + + def _cmp_method(self, other, op): + axis: Literal[1] = 1 # only relevant for Series other case + + self, other = self._align_for_op(other, axis, flex=False, level=None) + + # See GH#4537 for discussion of scalar op behavior + new_data = self._dispatch_frame_op(other, op, axis=axis) + return self._construct_result(new_data) + + def _arith_method(self, other, op): + if self._should_reindex_frame_op(other, op, 1, None, None): + return self._arith_method_with_reindex(other, op) + + axis: Literal[1] = 1 # only relevant for Series other case + other = maybe_prepare_scalar_for_op( + other, + # error: "FrameOps" has no attribute "shape" + (self.shape[axis],), # type: ignore[attr-defined] + ) + + self, other = self._align_for_op(other, axis, flex=True, level=None) + + new_data = self._dispatch_frame_op(other, op, axis=axis) + return self._construct_result(new_data) + + _logical_method = _arith_method + + def _arith_method_with_reindex(self, right: DataFrame, op) -> DataFrame: + """ + For DataFrame-with-DataFrame operations that require reindexing, + operate only on shared columns, then reindex. + + Parameters + ---------- + right : DataFrame + op : binary operator + + Returns + ------- + DataFrame + """ + left = cast("DataFrame", self) + + # GH#31623, only operate on shared columns + cols, lcols, rcols = left.columns.join( + right.columns, how="inner", level=None, return_indexers=True + ) + + new_left = left.iloc[:, lcols] + new_right = right.iloc[:, rcols] + result = op(new_left, new_right) + + # Do the join on the columns instead of using left._align_for_op + # to avoid constructing two potentially large/sparse DataFrames + join_columns, _, _ = left.columns.join( + right.columns, how="outer", level=None, return_indexers=True + ) + + if result.columns.has_duplicates: + # Avoid reindexing with a duplicate axis. + # https://github.com/pandas-dev/pandas/issues/35194 + indexer, _ = result.columns.get_indexer_non_unique(join_columns) + indexer = algorithms.unique1d(indexer) + result = result._reindex_with_indexers( + {1: [join_columns, indexer]}, allow_dups=True + ) + else: + result = result.reindex(join_columns, axis=1) + + return result + + def _should_reindex_frame_op(self, right, op, axis: int, fill_value, level) -> bool: + """ + Check if this is an operation between DataFrames that will need to reindex. + """ + if op is operator.pow or op is roperator.rpow: + # GH#32685 pow has special semantics for operating with null values + return False + + if not isinstance(right, FrameOps): + return False + + if fill_value is None and level is None and axis == 1: + # TODO: any other cases we should handle here? + + # Intersection is always unique so we have to check the unique columns + # error: "FrameOps" has no attribute "columns" + left_uniques = self.columns.unique() # type: ignore[attr-defined] + # error: "FrameOps" has no attribute "columns" + right_uniques = right.columns.unique() # type: ignore[attr-defined] + cols = left_uniques.intersection(right_uniques) + if len(cols) and not ( + len(cols) == len(left_uniques) and len(cols) == len(right_uniques) + ): + # TODO: is there a shortcut available when len(cols) == 0? + return True + + return False + + def _align_for_op( + self, other, axis, flex: bool | None = False, level: Level = None + ): + """ + Convert rhs to meet lhs dims if input is list, tuple or np.ndarray. + + Parameters + ---------- + left : DataFrame + right : Any + axis : int, str, or None + flex : bool or None, default False + Whether this is a flex op, in which case we reindex. + None indicates not to check for alignment. + level : int or level name, default None + + Returns + ------- + left : DataFrame + right : Any + """ + self = cast("DataFrame", self) + left, right = self, other + + def to_series(right): + msg = ( + "Unable to coerce to Series, " + "length must be {req_len}: given {given_len}" + ) + + # pass dtype to avoid doing inference, which would break consistency + # with Index/Series ops + dtype = None + if getattr(right, "dtype", None) == object: + # can't pass right.dtype unconditionally as that would break on e.g. + # datetime64[h] ndarray + dtype = object + + if axis is not None and left._get_axis_number(axis) == 0: + if len(left.index) != len(right): + raise ValueError( + msg.format(req_len=len(left.index), given_len=len(right)) + ) + right = left._constructor_sliced(right, index=left.index, dtype=dtype) + else: + if len(left.columns) != len(right): + raise ValueError( + msg.format(req_len=len(left.columns), given_len=len(right)) + ) + right = left._constructor_sliced(right, index=left.columns, dtype=dtype) + return right + + if isinstance(right, np.ndarray): + if right.ndim == 1: + right = to_series(right) + + elif right.ndim == 2: + # We need to pass dtype=right.dtype to retain object dtype + # otherwise we lose consistency with Index and array ops + dtype = None + if right.dtype == object: + # can't pass right.dtype unconditionally as that would break on e.g. + # datetime64[h] ndarray + dtype = object + + if right.shape == left.shape: + right = left._constructor( + right, index=left.index, columns=left.columns, dtype=dtype + ) + + elif right.shape[0] == left.shape[0] and right.shape[1] == 1: + # Broadcast across columns + right = np.broadcast_to(right, left.shape) + right = left._constructor( + right, index=left.index, columns=left.columns, dtype=dtype + ) + + elif right.shape[1] == left.shape[1] and right.shape[0] == 1: + # Broadcast along rows + right = to_series(right[0, :]) + + else: + raise ValueError( + "Unable to coerce to DataFrame, shape " + f"must be {left.shape}: given {right.shape}" + ) + + elif right.ndim > 2: + raise ValueError( + "Unable to coerce to Series/DataFrame, " + f"dimension must be <= 2: {right.shape}" + ) + + elif is_list_like(right) and not isinstance(right, (SeriesOps, FrameOps)): + # GH#36702. Raise when attempting arithmetic with list of array-like. + if any(is_array_like(el) for el in right): + raise ValueError( + f"Unable to coerce list of {type(right[0])} to Series/DataFrame" + ) + # GH#17901 + right = to_series(right) + + if flex is not None and isinstance(right, FrameOps): + rframe = cast("DataFrame", right) + if not left._indexed_same(rframe): + if flex: + left, right = left.align( + rframe, join="outer", level=level, copy=False + ) + else: + raise ValueError( + "Can only compare identically-labeled (both index and columns) " + "DataFrame objects" + ) + elif isinstance(right, SeriesOps): + right = cast("Series", right) + + # axis=1 is default for DataFrame-with-Series op + axis = left._get_axis_number(axis) if axis is not None else 1 + + if not flex: + if not left.axes[axis].equals(right.index): + raise ValueError( + "Operands are not aligned. Do " + "`left, right = left.align(right, axis=1, copy=False)` " + "before operating." + ) + + left, right = left.align( + # error: Argument 1 to "align" of "DataFrame" has incompatible + # type "Series"; expected "DataFrame" + right, # type: ignore[arg-type] + join="outer", + axis=axis, + level=level, + copy=False, + ) + right = left._maybe_align_series_as_frame(right, axis) + + return left, right + + def _maybe_align_series_as_frame(self, series: Series, axis: AxisInt): + """ + If the Series operand is not EA-dtype, we can broadcast to 2D and operate + blockwise. + """ + rvalues = series._values + if not isinstance(rvalues, np.ndarray): + # TODO(EA2D): no need to special-case with 2D EAs + if rvalues.dtype in ("datetime64[ns]", "timedelta64[ns]"): + # We can losslessly+cheaply cast to ndarray + rvalues = np.asarray(rvalues) + else: + return series + + if axis == 0: + rvalues = rvalues.reshape(-1, 1) + else: + rvalues = rvalues.reshape(1, -1) + + # error: "FrameOps" has no attribute "shape" + rvalues = np.broadcast_to(rvalues, self.shape) # type: ignore[attr-defined] + # pass dtype to avoid doing inference + # error: "FrameOps" has no attribute "_constructor" + return self._constructor( # type: ignore[attr-defined] + rvalues, + # error: "FrameOps" has no attribute "index" + index=self.index, # type: ignore[attr-defined] + # error: "FrameOps" has no attribute "columns" + columns=self.columns, # type: ignore[attr-defined] + dtype=rvalues.dtype, + ) + + def _dispatch_frame_op(self, right, func: Callable, axis: AxisInt | None = None): + """ + Evaluate the frame operation func(left, right) by evaluating + column-by-column, dispatching to the Series implementation. + + Parameters + ---------- + right : scalar, Series, or DataFrame + func : arithmetic or comparison operator + axis : {None, 0, 1} + + Returns + ------- + DataFrame + """ + # Get the appropriate array-op to apply to each column/block's values. + array_op = get_array_op(func) + + right = lib.item_from_zerodim(right) + if not is_list_like(right): + # i.e. scalar, faster than checking np.ndim(right) == 0 + with np.errstate(all="ignore"): + bm = self._mgr.apply(array_op, right=right) + # error: "FrameOps" has no attribute "_constructor" + return self._constructor(bm) # type: ignore[attr-defined] + + elif isinstance(right, FrameOps): + # error: "FrameOps" has no attribute "index" + assert self.index.equals(right.index) # type: ignore[attr-defined] + # error: "FrameOps" has no attribute "columns" + assert self.columns.equals(right.columns) # type: ignore[attr-defined] + # TODO: The previous assertion `assert right._indexed_same(self)` + # fails in cases with empty columns reached via + # _arith_method_with_reindex + + # TODO operate_blockwise expects a manager of the same type + with np.errstate(all="ignore"): + bm = self._mgr.operate_blockwise( + # error: Argument 1 to "operate_blockwise" of "ArrayManager" has + # incompatible type "Union[ArrayManager, BlockManager]"; expected + # "ArrayManager" + # error: Argument 1 to "operate_blockwise" of "BlockManager" has + # incompatible type "Union[ArrayManager, BlockManager]"; expected + # "BlockManager" + right._mgr, # type: ignore[arg-type] + array_op, + ) + # error: "FrameOps" has no attribute "_constructor" + return self._constructor(bm) # type: ignore[attr-defined] + + elif isinstance(right, SeriesOps) and axis == 1: + # axis=1 means we want to operate row-by-row + # error: "FrameOps" has no attribute "columns" + assert right.index.equals(self.columns) # type: ignore[attr-defined] + + right = right._values + # maybe_align_as_frame ensures we do not have an ndarray here + assert not isinstance(right, np.ndarray) + + # error: "FrameOps" has no attribute "_iter_column_arrays" + col_arrays = self._iter_column_arrays() # type: ignore[attr-defined] + with np.errstate(all="ignore"): + arrays = [ + array_op(_left, _right) for _left, _right in zip(col_arrays, right) + ] + + elif isinstance(right, SeriesOps): + # error: "FrameOps" has no attribute "index" + assert right.index.equals(self.index) # type: ignore[attr-defined] + right = right._values + + # error: "FrameOps" has no attribute "_iter_column_arrays" + col_arrays = self._iter_column_arrays() # type: ignore[attr-defined] + with np.errstate(all="ignore"): + arrays = [array_op(left, right) for left in col_arrays] + + else: + raise NotImplementedError(right) + + # error: "Type[FrameOps]" has no attribute "_from_arrays" + return type(self)._from_arrays( # type: ignore[attr-defined] + arrays, + # error: "FrameOps" has no attribute "columns" + self.columns, # type: ignore[attr-defined] + # error: "FrameOps" has no attribute "index" + self.index, # type: ignore[attr-defined] + verify_integrity=False, + ) + + def _combine_frame(self, other: FrameOps, func, fill_value=None): + # at this point we have `self._indexed_same(other)` + + if fill_value is None: + # since _arith_op may be called in a loop, avoid function call + # overhead if possible by doing this check once + _arith_op = func + + else: + from pandas.core.ops import fill_binop + + def _arith_op(left, right): + # for the mixed_type case where we iterate over columns, + # _arith_op(left, right) is equivalent to + # left._binop(right, func, fill_value=fill_value) + left, right = fill_binop(left, right, fill_value) + return func(left, right) + + new_data = self._dispatch_frame_op(other, _arith_op) + return new_data + + def _construct_result(self, result) -> DataFrame: + """ + Wrap the result of an arithmetic, comparison, or logical operation. + + Parameters + ---------- + result : DataFrame + + Returns + ------- + DataFrame + """ + # error: "FrameOps" has no attribute "_constructor" + out = self._constructor(result, copy=False) # type: ignore[attr-defined] + out = out.__finalize__(self) + # Pin columns instead of passing to constructor for compat with + # non-unique columns case + # error: "FrameOps" has no attribute "columns" + out.columns = self.columns # type: ignore[attr-defined] + # error: "FrameOps" has no attribute "index" + out.index = self.index # type: ignore[attr-defined] + return out + + def __divmod__(self, other) -> tuple[DataFrame, DataFrame]: + # Naive implementation, room for optimization + div = self // other + mod = self - div * other + return div, mod + + def __rdivmod__(self, other) -> tuple[DataFrame, DataFrame]: + # Naive implementation, room for optimization + div = other // self + mod = other - div * self + return div, mod + def _flex_arith_method( self, other, op, *, axis: Axis = "columns", level=None, fill_value=None ): axis = self._get_axis_number(axis) if axis is not None else 1 - axis = cast(int, axis) if self._should_reindex_frame_op(other, op, axis, fill_value, level): return self._arith_method_with_reindex(other, op) @@ -32,7 +485,11 @@ def _flex_arith_method( # through the DataFrame path raise NotImplementedError(f"fill_value {fill_value} not supported.") - other = maybe_prepare_scalar_for_op(other, self.shape) + other = maybe_prepare_scalar_for_op( + other, + # error: "FrameOps" has no attribute "shape" + self.shape, # type: ignore[attr-defined] + ) self, other = self._align_for_op(other, axis, flex=True, level=level) if isinstance(other, FrameOps): @@ -44,7 +501,8 @@ def _flex_arith_method( else: # in this case we always have `np.ndim(other) == 0` if fill_value is not None: - self = self.fillna(fill_value) + # error: "FrameOps" has no attribute "fillna" + self = self.fillna(fill_value) # type: ignore[attr-defined] new_data = self._dispatch_frame_op(other, op) @@ -177,6 +635,94 @@ def rpow(self, other, axis: Axis = "columns", level=None, fill_value=None): class SeriesOps: + _get_axis_number: Callable[[Any], int] + _values: ArrayLike + + def _binop(self, other: SeriesOps, func, level=None, fill_value=None): + """ + Perform generic binary operation with optional fill value. + + Parameters + ---------- + other : Series + func : binary operator + fill_value : float or object + Value to substitute for NA/null values. If both Series are NA in a + location, the result will be NA regardless of the passed fill value. + level : int or level name, default None + Broadcast across a level, matching Index values on the + passed MultiIndex level. + + Returns + ------- + Series + """ + if not isinstance(other, SeriesOps): + raise AssertionError("Other operand must be Series") + + this = self + + # error: "SeriesOps" has no attribute "index" + if not self.index.equals(other.index): # type: ignore[attr-defined] + # error: "SeriesOps" has no attribute "align" + this, other = self.align( # type: ignore[attr-defined] + other, level=level, join="outer", copy=False + ) + + from pandas.core.ops import fill_binop + + this_vals, other_vals = fill_binop(this._values, other._values, fill_value) + + with np.errstate(all="ignore"): + result = func(this_vals, other_vals) + + name = get_op_result_name(self, other) + return this._construct_result(result, name) + + def _construct_result( + self, result: ArrayLike | tuple[ArrayLike, ArrayLike], name: Hashable + ) -> Series | tuple[Series, Series]: + """ + Construct an appropriately-labelled Series from the result of an op. + + Parameters + ---------- + result : ndarray or ExtensionArray + name : Label + + Returns + ------- + Series + In the case of __divmod__ or __rdivmod__, a 2-tuple of Series. + """ + if isinstance(result, tuple): + # produced by divmod or rdivmod + + res1 = self._construct_result(result[0], name=name) + res2 = self._construct_result(result[1], name=name) + + # GH#33427 assertions to keep mypy happy + assert isinstance(res1, SeriesOps) + assert isinstance(res2, SeriesOps) + return (res1, res2) + + # TODO: result should always be ArrayLike, but this fails for some + # JSONArray tests + dtype = getattr(result, "dtype", None) + # error: "SeriesOps" has no attribute "_constructor" + out = self._constructor( # type: ignore[attr-defined] + result, + # error: "SeriesOps" has no attribute "index" + index=self.index, # type: ignore[attr-defined] + dtype=dtype, + ) + out = out.__finalize__(self) + + # Set the result's name after __finalize__ is called because __finalize__ + # would set it back to self.name + out.name = name + return out + def _flex_method(self, other, op, *, level=None, fill_value=None, axis: Axis = 0): if axis is not None: self._get_axis_number(axis) @@ -186,15 +732,19 @@ def _flex_method(self, other, op, *, level=None, fill_value=None, axis: Axis = 0 if isinstance(other, SeriesOps): return self._binop(other, op, level=level, fill_value=fill_value) elif isinstance(other, (np.ndarray, list, tuple)): - if len(other) != len(self): + # error: Argument 1 to "len" has incompatible type "SeriesOps"; + # expected "Sized" + if len(other) != len(self): # type: ignore[arg-type] raise ValueError("Lengths must be equal") - other = self._constructor(other, self.index) + # error: "SeriesOps" has no attribute "index" + other = self._constructor(other, self.index) # type: ignore[attr-defined] result = self._binop(other, op, level=level, fill_value=fill_value) result.name = res_name return result else: if fill_value is not None: - self = self.fillna(fill_value) + # error: "SeriesOps" has no attribute "fillna" + self = self.fillna(fill_value) # type: ignore[attr-defined] return op(self, other) diff --git a/pandas/core/series.py b/pandas/core/series.py index 0ded9b36c1bbd..2d66654e8ef5c 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -358,8 +358,6 @@ class Series(SeriesOps, base.IndexOpsMixin, NDFrame): # type: ignore[misc] doc=base.IndexOpsMixin.hasnans.__doc__, ) _mgr: SingleManager - div: Callable[[Series, Any], Series] - rdiv: Callable[[Series, Any], Series] # ---------------------------------------------------------------------- # Constructors @@ -2968,79 +2966,6 @@ def _append( to_concat, ignore_index=ignore_index, verify_integrity=verify_integrity ) - def _binop(self, other: Series, func, level=None, fill_value=None): - """ - Perform generic binary operation with optional fill value. - - Parameters - ---------- - other : Series - func : binary operator - fill_value : float or object - Value to substitute for NA/null values. If both Series are NA in a - location, the result will be NA regardless of the passed fill value. - level : int or level name, default None - Broadcast across a level, matching Index values on the - passed MultiIndex level. - - Returns - ------- - Series - """ - if not isinstance(other, Series): - raise AssertionError("Other operand must be Series") - - this = self - - if not self.index.equals(other.index): - this, other = self.align(other, level=level, join="outer", copy=False) - - this_vals, other_vals = ops.fill_binop(this._values, other._values, fill_value) - - with np.errstate(all="ignore"): - result = func(this_vals, other_vals) - - name = ops.get_op_result_name(self, other) - return this._construct_result(result, name) - - def _construct_result( - self, result: ArrayLike | tuple[ArrayLike, ArrayLike], name: Hashable - ) -> Series | tuple[Series, Series]: - """ - Construct an appropriately-labelled Series from the result of an op. - - Parameters - ---------- - result : ndarray or ExtensionArray - name : Label - - Returns - ------- - Series - In the case of __divmod__ or __rdivmod__, a 2-tuple of Series. - """ - if isinstance(result, tuple): - # produced by divmod or rdivmod - - res1 = self._construct_result(result[0], name=name) - res2 = self._construct_result(result[1], name=name) - - # GH#33427 assertions to keep mypy happy - assert isinstance(res1, Series) - assert isinstance(res2, Series) - return (res1, res2) - - # TODO: result should always be ArrayLike, but this fails for some - # JSONArray tests - dtype = getattr(result, "dtype", None) - out = self._constructor(result, index=self.index, dtype=dtype) - out = out.__finalize__(self) - - # Set the result's name after __finalize__ is called because __finalize__ - # would set it back to self.name - out.name = name - return out - @doc( _shared_docs["compare"], """ From c00e8b81b22079217794f0c1d4232141f45de3d5 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 6 Mar 2023 16:46:04 -0800 Subject: [PATCH 04/10] REF: move Series arithmetic helpers --- pandas/core/ops/methods.py | 61 ++++++++++++++++++++++++++++++++++++++ pandas/core/series.py | 55 +--------------------------------- 2 files changed, 62 insertions(+), 54 deletions(-) diff --git a/pandas/core/ops/methods.py b/pandas/core/ops/methods.py index e4c851fe6ace9..11fb176eeb689 100644 --- a/pandas/core/ops/methods.py +++ b/pandas/core/ops/methods.py @@ -33,8 +33,11 @@ algorithms, roperator, ) +from pandas.core.construction import extract_array from pandas.core.ops.array_ops import ( + comparison_op, get_array_op, + logical_op, maybe_prepare_scalar_for_op, ) from pandas.core.ops.common import get_op_result_name @@ -638,6 +641,64 @@ class SeriesOps: _get_axis_number: Callable[[Any], int] _values: ArrayLike + def _cmp_method(self, other, op): + res_name = get_op_result_name(self, other) + + if isinstance(other, SeriesOps): + # error: "SeriesOps" has no attribute "_indexed_same" + if not self._indexed_same(other): # type: ignore[attr-defined] + raise ValueError("Can only compare identically-labeled Series objects") + + lvalues = self._values + rvalues = extract_array(other, extract_numpy=True, extract_range=True) + + with np.errstate(all="ignore"): + res_values = comparison_op(lvalues, rvalues, op) + + return self._construct_result(res_values, name=res_name) + + def _logical_method(self, other, op): + res_name = get_op_result_name(self, other) + self, other = self._align_for_op(other, align_asobject=True) + + lvalues = self._values + rvalues = extract_array(other, extract_numpy=True, extract_range=True) + + res_values = logical_op(lvalues, rvalues, op) + return self._construct_result(res_values, name=res_name) + + def _arith_method(self, other, op): + self, other = self._align_for_op(other) + + # use IndexOpsMixin._arith_method + # error: "_arith_method" undefined in superclass + return super()._arith_method(other, op) # type: ignore[misc] + + def _align_for_op(self, right, align_asobject: bool = False): + """align lhs and rhs Series""" + # TODO: Different from DataFrame._align_for_op, list, tuple and ndarray + # are not coerced here + # because Series has inconsistencies described in GH#13637 + left = self + + if isinstance(right, SeriesOps): + # avoid repeated alignment + # error: "SeriesOps" has no attribute "index" + if not left.index.equals(right.index): # type: ignore[attr-defined] + if align_asobject: + # to keep original value's dtype for bool ops + # error: "SeriesOps" has no attribute "astype" + left = left.astype(object) # type: ignore[attr-defined] + # error: "SeriesOps" has no attribute "astype" + right = right.astype(object) # type: ignore[attr-defined] + + # error: "SeriesOps" has no attribute "align" + left, right = left.align( # type: ignore[attr-defined] + right, copy=False + ) + + return left, right + def _binop(self, other: SeriesOps, func, level=None, fill_value=None): """ Perform generic binary operation with optional fill value. diff --git a/pandas/core/series.py b/pandas/core/series.py index 2d66654e8ef5c..a2eb978245462 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -97,10 +97,7 @@ from pandas.core.arrays import ExtensionArray from pandas.core.arrays.categorical import CategoricalAccessor from pandas.core.arrays.sparse import SparseAccessor -from pandas.core.construction import ( - extract_array, - sanitize_array, -) +from pandas.core.construction import sanitize_array from pandas.core.generic import NDFrame from pandas.core.indexers import ( disallow_ndim_indexing, @@ -5934,55 +5931,5 @@ def mask( # Add plotting methods to Series hist = pandas.plotting.hist_series - # ---------------------------------------------------------------------- - # Template-Based Arithmetic/Comparison Methods - - def _cmp_method(self, other, op): - res_name = ops.get_op_result_name(self, other) - - if isinstance(other, Series) and not self._indexed_same(other): - raise ValueError("Can only compare identically-labeled Series objects") - - lvalues = self._values - rvalues = extract_array(other, extract_numpy=True, extract_range=True) - - with np.errstate(all="ignore"): - res_values = ops.comparison_op(lvalues, rvalues, op) - - return self._construct_result(res_values, name=res_name) - - def _logical_method(self, other, op): - res_name = ops.get_op_result_name(self, other) - self, other = self._align_for_op(other, align_asobject=True) - - lvalues = self._values - rvalues = extract_array(other, extract_numpy=True, extract_range=True) - - res_values = ops.logical_op(lvalues, rvalues, op) - return self._construct_result(res_values, name=res_name) - - def _arith_method(self, other, op): - self, other = self._align_for_op(other) - return base.IndexOpsMixin._arith_method(self, other, op) - - def _align_for_op(self, right, align_asobject: bool = False): - """align lhs and rhs Series""" - # TODO: Different from DataFrame._align_for_op, list, tuple and ndarray - # are not coerced here - # because Series has inconsistencies described in GH#13637 - left = self - - if isinstance(right, Series): - # avoid repeated alignment - if not left.index.equals(right.index): - if align_asobject: - # to keep original value's dtype for bool ops - left = left.astype(object) - right = right.astype(object) - - left, right = left.align(right, copy=False) - - return left, right - Series._add_numeric_operations() From 8cbcdce759912976ea9e48ee4c081943ec45b827 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 6 Mar 2023 17:29:26 -0800 Subject: [PATCH 05/10] move fill_binop --- pandas/core/ops/__init__.py | 49 ------------------------------------ pandas/core/ops/array_ops.py | 44 ++++++++++++++++++++++++++++++++ pandas/core/ops/methods.py | 4 +-- 3 files changed, 45 insertions(+), 52 deletions(-) diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index 26b1ceef3a0a8..103df179e19ae 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -5,8 +5,6 @@ """ from __future__ import annotations -from pandas.core.dtypes.missing import isna - from pandas.core.ops.array_ops import ( arithmetic_op, comp_method_OBJECT_ARRAY, @@ -65,59 +63,12 @@ COMPARISON_BINOPS: set[str] = {"eq", "ne", "lt", "gt", "le", "ge"} -# ----------------------------------------------------------------------------- -# Masking NA values and fallbacks for operations numpy does not support - - -def fill_binop(left, right, fill_value): - """ - If a non-None fill_value is given, replace null entries in left and right - with this value, but only in positions where _one_ of left/right is null, - not both. - - Parameters - ---------- - left : array-like - right : array-like - fill_value : object - - Returns - ------- - left : array-like - right : array-like - - Notes - ----- - Makes copies if fill_value is not None and NAs are present. - """ - if fill_value is not None: - left_mask = isna(left) - right_mask = isna(right) - - # one but not both - mask = left_mask ^ right_mask - - if left_mask.any(): - # Avoid making a copy if we can - left = left.copy() - left[left_mask & mask] = fill_value - - if right_mask.any(): - # Avoid making a copy if we can - right = right.copy() - right[right_mask & mask] = fill_value - - return left, right - - __all__ = [ "ARITHMETIC_BINOPS", "arithmetic_op", "COMPARISON_BINOPS", "comparison_op", "comp_method_OBJECT_ARRAY", - "fill_binop", - "flex_method_SERIES", "invalid_comparison", "kleene_and", "kleene_or", diff --git a/pandas/core/ops/array_ops.py b/pandas/core/ops/array_ops.py index dfffe77fe1b76..c0ab72e9d796b 100644 --- a/pandas/core/ops/array_ops.py +++ b/pandas/core/ops/array_ops.py @@ -54,6 +54,50 @@ from pandas.core.ops.dispatch import should_extension_dispatch from pandas.core.ops.invalid import invalid_comparison +# ----------------------------------------------------------------------------- +# Masking NA values and fallbacks for operations numpy does not support + + +def fill_binop(left, right, fill_value): + """ + If a non-None fill_value is given, replace null entries in left and right + with this value, but only in positions where _one_ of left/right is null, + not both. + + Parameters + ---------- + left : array-like + right : array-like + fill_value : object + + Returns + ------- + left : array-like + right : array-like + + Notes + ----- + Makes copies if fill_value is not None and NAs are present. + """ + if fill_value is not None: + left_mask = isna(left) + right_mask = isna(right) + + # one but not both + mask = left_mask ^ right_mask + + if left_mask.any(): + # Avoid making a copy if we can + left = left.copy() + left[left_mask & mask] = fill_value + + if right_mask.any(): + # Avoid making a copy if we can + right = right.copy() + right[right_mask & mask] = fill_value + + return left, right + def comp_method_OBJECT_ARRAY(op, x, y): if isinstance(y, list): diff --git a/pandas/core/ops/methods.py b/pandas/core/ops/methods.py index 11fb176eeb689..46ab8304fa6d9 100644 --- a/pandas/core/ops/methods.py +++ b/pandas/core/ops/methods.py @@ -36,6 +36,7 @@ from pandas.core.construction import extract_array from pandas.core.ops.array_ops import ( comparison_op, + fill_binop, get_array_op, logical_op, maybe_prepare_scalar_for_op, @@ -428,7 +429,6 @@ def _combine_frame(self, other: FrameOps, func, fill_value=None): _arith_op = func else: - from pandas.core.ops import fill_binop def _arith_op(left, right): # for the mixed_type case where we iterate over columns, @@ -730,8 +730,6 @@ def _binop(self, other: SeriesOps, func, level=None, fill_value=None): other, level=level, join="outer", copy=False ) - from pandas.core.ops import fill_binop - this_vals, other_vals = fill_binop(this._values, other._values, fill_value) with np.errstate(all="ignore"): From 8df6145d3f7a1f661d723af8db26899fd448e4f0 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 6 Mar 2023 18:52:18 -0800 Subject: [PATCH 06/10] pyright ignore --- pandas/core/ops/methods.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/ops/methods.py b/pandas/core/ops/methods.py index 46ab8304fa6d9..82f5cff7b32a1 100644 --- a/pandas/core/ops/methods.py +++ b/pandas/core/ops/methods.py @@ -779,7 +779,7 @@ def _construct_result( # Set the result's name after __finalize__ is called because __finalize__ # would set it back to self.name - out.name = name + out.name = name # pyright: ignore[reportGeneralTypeIssues] return out def _flex_method(self, other, op, *, level=None, fill_value=None, axis: Axis = 0): From 886c53215448b919bbcbabc20518090faa061a95 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 6 Mar 2023 21:03:46 -0800 Subject: [PATCH 07/10] fix ops.__init__.__all__ --- pandas/core/ops/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index 103df179e19ae..11b4c65290c81 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -74,7 +74,6 @@ "kleene_or", "kleene_xor", "logical_op", - "maybe_dispatch_ufunc_to_dunder_op", "radd", "rand_", "rdiv", From 1949c5a29bfdef003a96075be1a228b3595d1d02 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 7 Mar 2023 09:59:47 -0800 Subject: [PATCH 08/10] annotate --- pandas/_libs/properties.pyi | 16 +++++++- pandas/core/frame.py | 6 ++- pandas/core/ops/methods.py | 80 +++++++++++++++---------------------- pandas/core/series.py | 3 +- 4 files changed, 52 insertions(+), 53 deletions(-) diff --git a/pandas/_libs/properties.pyi b/pandas/_libs/properties.pyi index aaa44a0cf47bf..3a61c91506ecd 100644 --- a/pandas/_libs/properties.pyi +++ b/pandas/_libs/properties.pyi @@ -10,6 +10,14 @@ from pandas._typing import ( Series, ) +# These cannot _really_ be just FrameOps/SeriesOps, as those are +# mixins to DataFrame/Series. We include those here so that the annotations +# in the mixin are correct. +from pandas.core.ops.methods import ( + FrameOps, + SeriesOps, +) + # note: this is a lie to make type checkers happy (they special # case property). cache_readonly uses attribute names similar to # property (fget) but it does not provide fset and fdel. @@ -19,9 +27,13 @@ class AxisProperty: axis: int def __init__(self, axis: int = ..., doc: str = ...) -> None: ... @overload - def __get__(self, obj: DataFrame | Series, type) -> Index: ... + def __get__( + self, obj: DataFrame | Series | SeriesOps | FrameOps, type + ) -> Index: ... @overload def __get__(self, obj: None, type) -> AxisProperty: ... def __set__( - self, obj: DataFrame | Series, value: AnyArrayLike | Sequence + self, + obj: DataFrame | Series | SeriesOps | FrameOps, + value: AnyArrayLike | Sequence, ) -> None: ... diff --git a/pandas/core/frame.py b/pandas/core/frame.py index da69c39069f4e..5997734c4445f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -632,7 +632,8 @@ class DataFrame(NDFrame, FrameOps, OpsMixin): # type: ignore[misc] _mgr: BlockManager | ArrayManager @property - def _constructor(self) -> Callable[..., DataFrame]: + # error: Cannot override writeable attribute with read-only property + def _constructor(self) -> Callable[..., DataFrame]: # type: ignore[override] return DataFrame _constructor_sliced: Callable[..., Series] = Series @@ -906,7 +907,8 @@ def axes(self) -> list[Index]: return [self.index, self.columns] @property - def shape(self) -> tuple[int, int]: + # error: Cannot override writeable attribute with read-only property + def shape(self) -> tuple[int, int]: # type: ignore[override] """ Return a tuple representing the dimensionality of the DataFrame. diff --git a/pandas/core/ops/methods.py b/pandas/core/ops/methods.py index 82f5cff7b32a1..1ae23405c8cd8 100644 --- a/pandas/core/ops/methods.py +++ b/pandas/core/ops/methods.py @@ -45,6 +45,8 @@ from pandas.core.ops.docstrings import make_flex_doc if TYPE_CHECKING: + from pandas._libs.properties import AxisProperty + from pandas import ( DataFrame, Series, @@ -56,8 +58,12 @@ class FrameOps: + _constructor: Callable[..., DataFrame] _get_axis_number: Callable[[Any], int] _mgr: BlockManager | ArrayManager + index: AxisProperty + columns: AxisProperty + shape: tuple[int, int] def _cmp_method(self, other, op): axis: Literal[1] = 1 # only relevant for Series other case @@ -75,8 +81,7 @@ def _arith_method(self, other, op): axis: Literal[1] = 1 # only relevant for Series other case other = maybe_prepare_scalar_for_op( other, - # error: "FrameOps" has no attribute "shape" - (self.shape[axis],), # type: ignore[attr-defined] + (self.shape[axis],), ) self, other = self._align_for_op(other, axis, flex=True, level=None) @@ -145,10 +150,8 @@ def _should_reindex_frame_op(self, right, op, axis: int, fill_value, level) -> b # TODO: any other cases we should handle here? # Intersection is always unique so we have to check the unique columns - # error: "FrameOps" has no attribute "columns" - left_uniques = self.columns.unique() # type: ignore[attr-defined] - # error: "FrameOps" has no attribute "columns" - right_uniques = right.columns.unique() # type: ignore[attr-defined] + left_uniques = self.columns.unique() + right_uniques = right.columns.unique() cols = left_uniques.intersection(right_uniques) if len(cols) and not ( len(cols) == len(left_uniques) and len(cols) == len(right_uniques) @@ -318,16 +321,12 @@ def _maybe_align_series_as_frame(self, series: Series, axis: AxisInt): else: rvalues = rvalues.reshape(1, -1) - # error: "FrameOps" has no attribute "shape" - rvalues = np.broadcast_to(rvalues, self.shape) # type: ignore[attr-defined] + rvalues = np.broadcast_to(rvalues, self.shape) # pass dtype to avoid doing inference - # error: "FrameOps" has no attribute "_constructor" - return self._constructor( # type: ignore[attr-defined] + return self._constructor( rvalues, - # error: "FrameOps" has no attribute "index" - index=self.index, # type: ignore[attr-defined] - # error: "FrameOps" has no attribute "columns" - columns=self.columns, # type: ignore[attr-defined] + index=self.index, + columns=self.columns, dtype=rvalues.dtype, ) @@ -354,14 +353,11 @@ def _dispatch_frame_op(self, right, func: Callable, axis: AxisInt | None = None) # i.e. scalar, faster than checking np.ndim(right) == 0 with np.errstate(all="ignore"): bm = self._mgr.apply(array_op, right=right) - # error: "FrameOps" has no attribute "_constructor" - return self._constructor(bm) # type: ignore[attr-defined] + return self._constructor(bm) elif isinstance(right, FrameOps): - # error: "FrameOps" has no attribute "index" - assert self.index.equals(right.index) # type: ignore[attr-defined] - # error: "FrameOps" has no attribute "columns" - assert self.columns.equals(right.columns) # type: ignore[attr-defined] + assert self.index.equals(right.index) + assert self.columns.equals(right.columns) # TODO: The previous assertion `assert right._indexed_same(self)` # fails in cases with empty columns reached via # _arith_method_with_reindex @@ -378,13 +374,11 @@ def _dispatch_frame_op(self, right, func: Callable, axis: AxisInt | None = None) right._mgr, # type: ignore[arg-type] array_op, ) - # error: "FrameOps" has no attribute "_constructor" - return self._constructor(bm) # type: ignore[attr-defined] + return self._constructor(bm) elif isinstance(right, SeriesOps) and axis == 1: # axis=1 means we want to operate row-by-row - # error: "FrameOps" has no attribute "columns" - assert right.index.equals(self.columns) # type: ignore[attr-defined] + assert right.index.equals(self.columns) right = right._values # maybe_align_as_frame ensures we do not have an ndarray here @@ -398,8 +392,7 @@ def _dispatch_frame_op(self, right, func: Callable, axis: AxisInt | None = None) ] elif isinstance(right, SeriesOps): - # error: "FrameOps" has no attribute "index" - assert right.index.equals(self.index) # type: ignore[attr-defined] + assert right.index.equals(self.index) right = right._values # error: "FrameOps" has no attribute "_iter_column_arrays" @@ -413,10 +406,8 @@ def _dispatch_frame_op(self, right, func: Callable, axis: AxisInt | None = None) # error: "Type[FrameOps]" has no attribute "_from_arrays" return type(self)._from_arrays( # type: ignore[attr-defined] arrays, - # error: "FrameOps" has no attribute "columns" - self.columns, # type: ignore[attr-defined] - # error: "FrameOps" has no attribute "index" - self.index, # type: ignore[attr-defined] + self.columns, + self.index, verify_integrity=False, ) @@ -452,15 +443,12 @@ def _construct_result(self, result) -> DataFrame: ------- DataFrame """ - # error: "FrameOps" has no attribute "_constructor" - out = self._constructor(result, copy=False) # type: ignore[attr-defined] + out = self._constructor(result, copy=False) out = out.__finalize__(self) # Pin columns instead of passing to constructor for compat with # non-unique columns case - # error: "FrameOps" has no attribute "columns" - out.columns = self.columns # type: ignore[attr-defined] - # error: "FrameOps" has no attribute "index" - out.index = self.index # type: ignore[attr-defined] + out.columns = self.columns + out.index = self.index return out def __divmod__(self, other) -> tuple[DataFrame, DataFrame]: @@ -490,8 +478,7 @@ def _flex_arith_method( other = maybe_prepare_scalar_for_op( other, - # error: "FrameOps" has no attribute "shape" - self.shape, # type: ignore[attr-defined] + self.shape, ) self, other = self._align_for_op(other, axis, flex=True, level=level) @@ -638,8 +625,10 @@ def rpow(self, other, axis: Axis = "columns", level=None, fill_value=None): class SeriesOps: + _constructor: Callable[..., Series] _get_axis_number: Callable[[Any], int] _values: ArrayLike + index: AxisProperty def _cmp_method(self, other, op): res_name = get_op_result_name(self, other) @@ -683,8 +672,7 @@ def _align_for_op(self, right, align_asobject: bool = False): if isinstance(right, SeriesOps): # avoid repeated alignment - # error: "SeriesOps" has no attribute "index" - if not left.index.equals(right.index): # type: ignore[attr-defined] + if not left.index.equals(right.index): if align_asobject: # to keep original value's dtype for bool ops # error: "SeriesOps" has no attribute "astype" @@ -723,8 +711,7 @@ def _binop(self, other: SeriesOps, func, level=None, fill_value=None): this = self - # error: "SeriesOps" has no attribute "index" - if not self.index.equals(other.index): # type: ignore[attr-defined] + if not self.index.equals(other.index): # error: "SeriesOps" has no attribute "align" this, other = self.align( # type: ignore[attr-defined] other, level=level, join="outer", copy=False @@ -768,11 +755,9 @@ def _construct_result( # TODO: result should always be ArrayLike, but this fails for some # JSONArray tests dtype = getattr(result, "dtype", None) - # error: "SeriesOps" has no attribute "_constructor" - out = self._constructor( # type: ignore[attr-defined] + out = self._constructor( result, - # error: "SeriesOps" has no attribute "index" - index=self.index, # type: ignore[attr-defined] + index=self.index, dtype=dtype, ) out = out.__finalize__(self) @@ -795,8 +780,7 @@ def _flex_method(self, other, op, *, level=None, fill_value=None, axis: Axis = 0 # expected "Sized" if len(other) != len(self): # type: ignore[arg-type] raise ValueError("Lengths must be equal") - # error: "SeriesOps" has no attribute "index" - other = self._constructor(other, self.index) # type: ignore[attr-defined] + other = self._constructor(other, self.index) result = self._binop(other, op, level=level, fill_value=fill_value) result.name = res_name return result diff --git a/pandas/core/series.py b/pandas/core/series.py index a2eb978245462..3ee1f312be8f4 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -552,7 +552,8 @@ def _init_dict( # ---------------------------------------------------------------------- @property - def _constructor(self) -> Callable[..., Series]: + # error: Cannot override writeable attribute with read-only property + def _constructor(self) -> Callable[..., Series]: # type: ignore[override] return Series @property From d75499ad8ed694c7ec92fae8b2f24621d0b23165 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 7 Mar 2023 17:39:23 -0800 Subject: [PATCH 09/10] REF: move everything back to Series/DataFrame classes --- pandas/_libs/properties.pyi | 16 +- pandas/core/frame.py | 559 ++++++++++++++++++++- pandas/core/ops/__init__.py | 4 + pandas/core/ops/methods.py | 933 ------------------------------------ pandas/core/series.py | 299 +++++++++++- 5 files changed, 852 insertions(+), 959 deletions(-) delete mode 100644 pandas/core/ops/methods.py diff --git a/pandas/_libs/properties.pyi b/pandas/_libs/properties.pyi index 3a61c91506ecd..aaa44a0cf47bf 100644 --- a/pandas/_libs/properties.pyi +++ b/pandas/_libs/properties.pyi @@ -10,14 +10,6 @@ from pandas._typing import ( Series, ) -# These cannot _really_ be just FrameOps/SeriesOps, as those are -# mixins to DataFrame/Series. We include those here so that the annotations -# in the mixin are correct. -from pandas.core.ops.methods import ( - FrameOps, - SeriesOps, -) - # note: this is a lie to make type checkers happy (they special # case property). cache_readonly uses attribute names similar to # property (fget) but it does not provide fset and fdel. @@ -27,13 +19,9 @@ class AxisProperty: axis: int def __init__(self, axis: int = ..., doc: str = ...) -> None: ... @overload - def __get__( - self, obj: DataFrame | Series | SeriesOps | FrameOps, type - ) -> Index: ... + def __get__(self, obj: DataFrame | Series, type) -> Index: ... @overload def __get__(self, obj: None, type) -> AxisProperty: ... def __set__( - self, - obj: DataFrame | Series | SeriesOps | FrameOps, - value: AnyArrayLike | Sequence, + self, obj: DataFrame | Series, value: AnyArrayLike | Sequence ) -> None: ... diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 529de5e3053d3..7db8c48b467a6 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -15,6 +15,7 @@ import functools from io import StringIO import itertools +import operator import sys from textwrap import dedent from typing import ( @@ -88,6 +89,7 @@ from pandas.core.dtypes.common import ( infer_dtype_from_object, is_1d_only_ea_dtype, + is_array_like, is_bool_dtype, is_dataclass, is_dict_like, @@ -115,6 +117,8 @@ algorithms, common as com, nanops, + ops, + roperator, ) from pandas.core.accessor import CachedAccessor from pandas.core.apply import ( @@ -171,7 +175,6 @@ treat_as_nested, ) from pandas.core.methods import selectn -from pandas.core.ops.methods import FrameOps from pandas.core.reshape.melt import melt from pandas.core.series import Series from pandas.core.shared_docs import _shared_docs @@ -488,8 +491,7 @@ # DataFrame class -# Cannot override writable attribute "_get_axis_number" with a final one -class DataFrame(NDFrame, FrameOps, OpsMixin): # type: ignore[misc] +class DataFrame(NDFrame, OpsMixin): """ Two-dimensional, size-mutable, potentially heterogeneous tabular data. @@ -632,8 +634,7 @@ class DataFrame(NDFrame, FrameOps, OpsMixin): # type: ignore[misc] _mgr: BlockManager | ArrayManager @property - # error: Cannot override writeable attribute with read-only property - def _constructor(self) -> Callable[..., DataFrame]: # type: ignore[override] + def _constructor(self) -> Callable[..., DataFrame]: return DataFrame _constructor_sliced: Callable[..., Series] = Series @@ -907,8 +908,7 @@ def axes(self) -> list[Index]: return [self.index, self.columns] @property - # error: Cannot override writeable attribute with read-only property - def shape(self) -> tuple[int, int]: # type: ignore[override] + def shape(self) -> tuple[int, int]: """ Return a tuple representing the dimensionality of the DataFrame. @@ -7456,6 +7456,551 @@ class diet result.columns = result.columns.reorder_levels(order) return result + # ---------------------------------------------------------------------- + # Arithmetic Methods + + def _cmp_method(self, other, op): + axis: Literal[1] = 1 # only relevant for Series other case + + self, other = self._align_for_op(other, axis, flex=False, level=None) + + # See GH#4537 for discussion of scalar op behavior + new_data = self._dispatch_frame_op(other, op, axis=axis) + return self._construct_result(new_data) + + def _arith_method(self, other, op): + if self._should_reindex_frame_op(other, op, 1, None, None): + return self._arith_method_with_reindex(other, op) + + axis: Literal[1] = 1 # only relevant for Series other case + other = ops.maybe_prepare_scalar_for_op(other, (self.shape[axis],)) + + self, other = self._align_for_op(other, axis, flex=True, level=None) + + new_data = self._dispatch_frame_op(other, op, axis=axis) + return self._construct_result(new_data) + + _logical_method = _arith_method + + def _dispatch_frame_op(self, right, func: Callable, axis: AxisInt | None = None): + """ + Evaluate the frame operation func(left, right) by evaluating + column-by-column, dispatching to the Series implementation. + + Parameters + ---------- + right : scalar, Series, or DataFrame + func : arithmetic or comparison operator + axis : {None, 0, 1} + + Returns + ------- + DataFrame + """ + # Get the appropriate array-op to apply to each column/block's values. + array_op = ops.get_array_op(func) + + right = lib.item_from_zerodim(right) + if not is_list_like(right): + # i.e. scalar, faster than checking np.ndim(right) == 0 + with np.errstate(all="ignore"): + bm = self._mgr.apply(array_op, right=right) + return self._constructor(bm) + + elif isinstance(right, DataFrame): + assert self.index.equals(right.index) + assert self.columns.equals(right.columns) + # TODO: The previous assertion `assert right._indexed_same(self)` + # fails in cases with empty columns reached via + # _frame_arith_method_with_reindex + + # TODO operate_blockwise expects a manager of the same type + with np.errstate(all="ignore"): + bm = self._mgr.operate_blockwise( + # error: Argument 1 to "operate_blockwise" of "ArrayManager" has + # incompatible type "Union[ArrayManager, BlockManager]"; expected + # "ArrayManager" + # error: Argument 1 to "operate_blockwise" of "BlockManager" has + # incompatible type "Union[ArrayManager, BlockManager]"; expected + # "BlockManager" + right._mgr, # type: ignore[arg-type] + array_op, + ) + return self._constructor(bm) + + elif isinstance(right, Series) and axis == 1: + # axis=1 means we want to operate row-by-row + assert right.index.equals(self.columns) + + right = right._values + # maybe_align_as_frame ensures we do not have an ndarray here + assert not isinstance(right, np.ndarray) + + with np.errstate(all="ignore"): + arrays = [ + array_op(_left, _right) + for _left, _right in zip(self._iter_column_arrays(), right) + ] + + elif isinstance(right, Series): + assert right.index.equals(self.index) + right = right._values + + with np.errstate(all="ignore"): + arrays = [array_op(left, right) for left in self._iter_column_arrays()] + + else: + raise NotImplementedError(right) + + return type(self)._from_arrays( + arrays, self.columns, self.index, verify_integrity=False + ) + + def _combine_frame(self, other: DataFrame, func, fill_value=None): + # at this point we have `self._indexed_same(other)` + + if fill_value is None: + # since _arith_op may be called in a loop, avoid function call + # overhead if possible by doing this check once + _arith_op = func + + else: + + def _arith_op(left, right): + # for the mixed_type case where we iterate over columns, + # _arith_op(left, right) is equivalent to + # left._binop(right, func, fill_value=fill_value) + left, right = ops.fill_binop(left, right, fill_value) + return func(left, right) + + new_data = self._dispatch_frame_op(other, _arith_op) + return new_data + + def _arith_method_with_reindex(self, right: DataFrame, op) -> DataFrame: + """ + For DataFrame-with-DataFrame operations that require reindexing, + operate only on shared columns, then reindex. + + Parameters + ---------- + right : DataFrame + op : binary operator + + Returns + ------- + DataFrame + """ + left = self + + # GH#31623, only operate on shared columns + cols, lcols, rcols = left.columns.join( + right.columns, how="inner", level=None, return_indexers=True + ) + + new_left = left.iloc[:, lcols] + new_right = right.iloc[:, rcols] + result = op(new_left, new_right) + + # Do the join on the columns instead of using left._align_for_op + # to avoid constructing two potentially large/sparse DataFrames + join_columns, _, _ = left.columns.join( + right.columns, how="outer", level=None, return_indexers=True + ) + + if result.columns.has_duplicates: + # Avoid reindexing with a duplicate axis. + # https://github.com/pandas-dev/pandas/issues/35194 + indexer, _ = result.columns.get_indexer_non_unique(join_columns) + indexer = algorithms.unique1d(indexer) + result = result._reindex_with_indexers( + {1: [join_columns, indexer]}, allow_dups=True + ) + else: + result = result.reindex(join_columns, axis=1) + + return result + + def _should_reindex_frame_op(self, right, op, axis: int, fill_value, level) -> bool: + """ + Check if this is an operation between DataFrames that will need to reindex. + """ + if op is operator.pow or op is roperator.rpow: + # GH#32685 pow has special semantics for operating with null values + return False + + if not isinstance(right, DataFrame): + return False + + if fill_value is None and level is None and axis == 1: + # TODO: any other cases we should handle here? + + # Intersection is always unique so we have to check the unique columns + left_uniques = self.columns.unique() + right_uniques = right.columns.unique() + cols = left_uniques.intersection(right_uniques) + if len(cols) and not ( + len(cols) == len(left_uniques) and len(cols) == len(right_uniques) + ): + # TODO: is there a shortcut available when len(cols) == 0? + return True + + return False + + def _align_for_op( + self, other, axis, flex: bool | None = False, level: Level = None + ): + """ + Convert rhs to meet lhs dims if input is list, tuple or np.ndarray. + + Parameters + ---------- + left : DataFrame + right : Any + axis : int, str, or None + flex : bool or None, default False + Whether this is a flex op, in which case we reindex. + None indicates not to check for alignment. + level : int or level name, default None + + Returns + ------- + left : DataFrame + right : Any + """ + left, right = self, other + + def to_series(right): + msg = ( + "Unable to coerce to Series, " + "length must be {req_len}: given {given_len}" + ) + + # pass dtype to avoid doing inference, which would break consistency + # with Index/Series ops + dtype = None + if getattr(right, "dtype", None) == object: + # can't pass right.dtype unconditionally as that would break on e.g. + # datetime64[h] ndarray + dtype = object + + if axis is not None and left._get_axis_number(axis) == 0: + if len(left.index) != len(right): + raise ValueError( + msg.format(req_len=len(left.index), given_len=len(right)) + ) + right = left._constructor_sliced(right, index=left.index, dtype=dtype) + else: + if len(left.columns) != len(right): + raise ValueError( + msg.format(req_len=len(left.columns), given_len=len(right)) + ) + right = left._constructor_sliced(right, index=left.columns, dtype=dtype) + return right + + if isinstance(right, np.ndarray): + if right.ndim == 1: + right = to_series(right) + + elif right.ndim == 2: + # We need to pass dtype=right.dtype to retain object dtype + # otherwise we lose consistency with Index and array ops + dtype = None + if right.dtype == object: + # can't pass right.dtype unconditionally as that would break on e.g. + # datetime64[h] ndarray + dtype = object + + if right.shape == left.shape: + right = left._constructor( + right, index=left.index, columns=left.columns, dtype=dtype + ) + + elif right.shape[0] == left.shape[0] and right.shape[1] == 1: + # Broadcast across columns + right = np.broadcast_to(right, left.shape) + right = left._constructor( + right, index=left.index, columns=left.columns, dtype=dtype + ) + + elif right.shape[1] == left.shape[1] and right.shape[0] == 1: + # Broadcast along rows + right = to_series(right[0, :]) + + else: + raise ValueError( + "Unable to coerce to DataFrame, shape " + f"must be {left.shape}: given {right.shape}" + ) + + elif right.ndim > 2: + raise ValueError( + "Unable to coerce to Series/DataFrame, " + f"dimension must be <= 2: {right.shape}" + ) + + elif is_list_like(right) and not isinstance(right, (Series, DataFrame)): + # GH#36702. Raise when attempting arithmetic with list of array-like. + if any(is_array_like(el) for el in right): + raise ValueError( + f"Unable to coerce list of {type(right[0])} to Series/DataFrame" + ) + # GH#17901 + right = to_series(right) + + if flex is not None and isinstance(right, DataFrame): + if not left._indexed_same(right): + if flex: + left, right = left.align( + right, join="outer", level=level, copy=False + ) + else: + raise ValueError( + "Can only compare identically-labeled (both index and columns) " + "DataFrame objects" + ) + elif isinstance(right, Series): + # axis=1 is default for DataFrame-with-Series op + axis = left._get_axis_number(axis) if axis is not None else 1 + + if not flex: + if not left.axes[axis].equals(right.index): + raise ValueError( + "Operands are not aligned. Do " + "`left, right = left.align(right, axis=1, copy=False)` " + "before operating." + ) + + left, right = left.align( + # error: Argument 1 to "align" of "DataFrame" has incompatible + # type "Series"; expected "DataFrame" + right, # type: ignore[arg-type] + join="outer", + axis=axis, + level=level, + copy=False, + ) + right = left._maybe_align_series_as_frame(right, axis) + + return left, right + + def _maybe_align_series_as_frame(self, series: Series, axis: AxisInt): + """ + If the Series operand is not EA-dtype, we can broadcast to 2D and operate + blockwise. + """ + rvalues = series._values + if not isinstance(rvalues, np.ndarray): + # TODO(EA2D): no need to special-case with 2D EAs + if rvalues.dtype in ("datetime64[ns]", "timedelta64[ns]"): + # We can losslessly+cheaply cast to ndarray + rvalues = np.asarray(rvalues) + else: + return series + + if axis == 0: + rvalues = rvalues.reshape(-1, 1) + else: + rvalues = rvalues.reshape(1, -1) + + rvalues = np.broadcast_to(rvalues, self.shape) + # pass dtype to avoid doing inference + return self._constructor( + rvalues, + index=self.index, + columns=self.columns, + dtype=rvalues.dtype, + ) + + def _flex_arith_method( + self, other, op, *, axis: Axis = "columns", level=None, fill_value=None + ): + axis = self._get_axis_number(axis) if axis is not None else 1 + + if self._should_reindex_frame_op(other, op, axis, fill_value, level): + return self._arith_method_with_reindex(other, op) + + if isinstance(other, Series) and fill_value is not None: + # TODO: We could allow this in cases where we end up going + # through the DataFrame path + raise NotImplementedError(f"fill_value {fill_value} not supported.") + + other = ops.maybe_prepare_scalar_for_op( + other, + self.shape, + ) + self, other = self._align_for_op(other, axis, flex=True, level=level) + + if isinstance(other, DataFrame): + # Another DataFrame + new_data = self._combine_frame(other, op, fill_value) + + elif isinstance(other, Series): + new_data = self._dispatch_frame_op(other, op, axis=axis) + else: + # in this case we always have `np.ndim(other) == 0` + if fill_value is not None: + self = self.fillna(fill_value) + + new_data = self._dispatch_frame_op(other, op) + + return self._construct_result(new_data) + + def _construct_result(self, result) -> DataFrame: + """ + Wrap the result of an arithmetic, comparison, or logical operation. + + Parameters + ---------- + result : DataFrame + + Returns + ------- + DataFrame + """ + out = self._constructor(result, copy=False).__finalize__(self) + # Pin columns instead of passing to constructor for compat with + # non-unique columns case + out.columns = self.columns + out.index = self.index + return out + + def __divmod__(self, other) -> tuple[DataFrame, DataFrame]: + # Naive implementation, room for optimization + div = self // other + mod = self - div * other + return div, mod + + def __rdivmod__(self, other) -> tuple[DataFrame, DataFrame]: + # Naive implementation, room for optimization + div = other // self + mod = other - div * self + return div, mod + + def _flex_cmp_method(self, other, op, *, axis: Axis = "columns", level=None): + axis = self._get_axis_number(axis) if axis is not None else 1 + + self, other = self._align_for_op(other, axis, flex=True, level=level) + + new_data = self._dispatch_frame_op(other, op, axis=axis) + return self._construct_result(new_data) + + @Appender(ops.make_flex_doc("eq", "dataframe")) + def eq(self, other, axis: Axis = "columns", level=None): + return self._flex_cmp_method(other, operator.eq, axis=axis, level=level) + + @Appender(ops.make_flex_doc("ne", "dataframe")) + def ne(self, other, axis: Axis = "columns", level=None): + return self._flex_cmp_method(other, operator.ne, axis=axis, level=level) + + @Appender(ops.make_flex_doc("le", "dataframe")) + def le(self, other, axis: Axis = "columns", level=None): + return self._flex_cmp_method(other, operator.le, axis=axis, level=level) + + @Appender(ops.make_flex_doc("lt", "dataframe")) + def lt(self, other, axis: Axis = "columns", level=None): + return self._flex_cmp_method(other, operator.lt, axis=axis, level=level) + + @Appender(ops.make_flex_doc("ge", "dataframe")) + def ge(self, other, axis: Axis = "columns", level=None): + return self._flex_cmp_method(other, operator.ge, axis=axis, level=level) + + @Appender(ops.make_flex_doc("gt", "dataframe")) + def gt(self, other, axis: Axis = "columns", level=None): + return self._flex_cmp_method(other, operator.gt, axis=axis, level=level) + + @Appender(ops.make_flex_doc("add", "dataframe")) + def add(self, other, axis: Axis = "columns", level=None, fill_value=None): + return self._flex_arith_method( + other, operator.add, level=level, fill_value=fill_value, axis=axis + ) + + @Appender(ops.make_flex_doc("radd", "dataframe")) + def radd(self, other, axis: Axis = "columns", level=None, fill_value=None): + return self._flex_arith_method( + other, roperator.radd, level=level, fill_value=fill_value, axis=axis + ) + + @Appender(ops.make_flex_doc("sub", "dataframe")) + def sub(self, other, axis: Axis = "columns", level=None, fill_value=None): + return self._flex_arith_method( + other, operator.sub, level=level, fill_value=fill_value, axis=axis + ) + + subtract = sub + + @Appender(ops.make_flex_doc("rsub", "dataframe")) + def rsub(self, other, axis: Axis = "columns", level=None, fill_value=None): + return self._flex_arith_method( + other, roperator.rsub, level=level, fill_value=fill_value, axis=axis + ) + + @Appender(ops.make_flex_doc("mul", "dataframe")) + def mul(self, other, axis: Axis = "columns", level=None, fill_value=None): + return self._flex_arith_method( + other, operator.mul, level=level, fill_value=fill_value, axis=axis + ) + + multiply = mul + + @Appender(ops.make_flex_doc("rmul", "dataframe")) + def rmul(self, other, axis: Axis = "columns", level=None, fill_value=None): + return self._flex_arith_method( + other, roperator.rmul, level=level, fill_value=fill_value, axis=axis + ) + + @Appender(ops.make_flex_doc("truediv", "dataframe")) + def truediv(self, other, axis: Axis = "columns", level=None, fill_value=None): + return self._flex_arith_method( + other, operator.truediv, level=level, fill_value=fill_value, axis=axis + ) + + div = truediv + divide = truediv + + @Appender(ops.make_flex_doc("rtruediv", "dataframe")) + def rtruediv(self, other, axis: Axis = "columns", level=None, fill_value=None): + return self._flex_arith_method( + other, roperator.rtruediv, level=level, fill_value=fill_value, axis=axis + ) + + rdiv = rtruediv + + @Appender(ops.make_flex_doc("floordiv", "dataframe")) + def floordiv(self, other, axis: Axis = "columns", level=None, fill_value=None): + return self._flex_arith_method( + other, operator.floordiv, level=level, fill_value=fill_value, axis=axis + ) + + @Appender(ops.make_flex_doc("rfloordiv", "dataframe")) + def rfloordiv(self, other, axis: Axis = "columns", level=None, fill_value=None): + return self._flex_arith_method( + other, roperator.rfloordiv, level=level, fill_value=fill_value, axis=axis + ) + + @Appender(ops.make_flex_doc("mod", "dataframe")) + def mod(self, other, axis: Axis = "columns", level=None, fill_value=None): + return self._flex_arith_method( + other, operator.mod, level=level, fill_value=fill_value, axis=axis + ) + + @Appender(ops.make_flex_doc("rmod", "dataframe")) + def rmod(self, other, axis: Axis = "columns", level=None, fill_value=None): + return self._flex_arith_method( + other, roperator.rmod, level=level, fill_value=fill_value, axis=axis + ) + + @Appender(ops.make_flex_doc("pow", "dataframe")) + def pow(self, other, axis: Axis = "columns", level=None, fill_value=None): + return self._flex_arith_method( + other, operator.pow, level=level, fill_value=fill_value, axis=axis + ) + + @Appender(ops.make_flex_doc("rpow", "dataframe")) + def rpow(self, other, axis: Axis = "columns", level=None, fill_value=None): + return self._flex_arith_method( + other, roperator.rpow, level=level, fill_value=fill_value, axis=axis + ) + # ---------------------------------------------------------------------- # Combination-Related diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index 11b4c65290c81..b2ea8102e2747 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -9,6 +9,7 @@ arithmetic_op, comp_method_OBJECT_ARRAY, comparison_op, + fill_binop, get_array_op, logical_op, maybe_prepare_scalar_for_op, @@ -17,6 +18,7 @@ get_op_result_name, unpack_zerodim_and_defer, ) +from pandas.core.ops.docstrings import make_flex_doc from pandas.core.ops.invalid import invalid_comparison from pandas.core.ops.mask_ops import ( kleene_and, @@ -70,10 +72,12 @@ "comparison_op", "comp_method_OBJECT_ARRAY", "invalid_comparison", + "fill_binop", "kleene_and", "kleene_or", "kleene_xor", "logical_op", + "make_flex_doc", "radd", "rand_", "rdiv", diff --git a/pandas/core/ops/methods.py b/pandas/core/ops/methods.py deleted file mode 100644 index 1ae23405c8cd8..0000000000000 --- a/pandas/core/ops/methods.py +++ /dev/null @@ -1,933 +0,0 @@ -""" -Functions to generate methods and pin them to the appropriate classes. -""" -from __future__ import annotations - -import operator -from typing import ( - TYPE_CHECKING, - Any, - Callable, - Hashable, - Literal, - cast, -) - -import numpy as np - -from pandas._libs import lib -from pandas._typing import ( - ArrayLike, - Axis, - AxisInt, - Level, -) -from pandas.util._decorators import Appender - -from pandas.core.dtypes.common import ( - is_array_like, - is_list_like, -) - -from pandas.core import ( - algorithms, - roperator, -) -from pandas.core.construction import extract_array -from pandas.core.ops.array_ops import ( - comparison_op, - fill_binop, - get_array_op, - logical_op, - maybe_prepare_scalar_for_op, -) -from pandas.core.ops.common import get_op_result_name -from pandas.core.ops.docstrings import make_flex_doc - -if TYPE_CHECKING: - from pandas._libs.properties import AxisProperty - - from pandas import ( - DataFrame, - Series, - ) - from pandas.core.internals import ( - ArrayManager, - BlockManager, - ) - - -class FrameOps: - _constructor: Callable[..., DataFrame] - _get_axis_number: Callable[[Any], int] - _mgr: BlockManager | ArrayManager - index: AxisProperty - columns: AxisProperty - shape: tuple[int, int] - - def _cmp_method(self, other, op): - axis: Literal[1] = 1 # only relevant for Series other case - - self, other = self._align_for_op(other, axis, flex=False, level=None) - - # See GH#4537 for discussion of scalar op behavior - new_data = self._dispatch_frame_op(other, op, axis=axis) - return self._construct_result(new_data) - - def _arith_method(self, other, op): - if self._should_reindex_frame_op(other, op, 1, None, None): - return self._arith_method_with_reindex(other, op) - - axis: Literal[1] = 1 # only relevant for Series other case - other = maybe_prepare_scalar_for_op( - other, - (self.shape[axis],), - ) - - self, other = self._align_for_op(other, axis, flex=True, level=None) - - new_data = self._dispatch_frame_op(other, op, axis=axis) - return self._construct_result(new_data) - - _logical_method = _arith_method - - def _arith_method_with_reindex(self, right: DataFrame, op) -> DataFrame: - """ - For DataFrame-with-DataFrame operations that require reindexing, - operate only on shared columns, then reindex. - - Parameters - ---------- - right : DataFrame - op : binary operator - - Returns - ------- - DataFrame - """ - left = cast("DataFrame", self) - - # GH#31623, only operate on shared columns - cols, lcols, rcols = left.columns.join( - right.columns, how="inner", level=None, return_indexers=True - ) - - new_left = left.iloc[:, lcols] - new_right = right.iloc[:, rcols] - result = op(new_left, new_right) - - # Do the join on the columns instead of using left._align_for_op - # to avoid constructing two potentially large/sparse DataFrames - join_columns, _, _ = left.columns.join( - right.columns, how="outer", level=None, return_indexers=True - ) - - if result.columns.has_duplicates: - # Avoid reindexing with a duplicate axis. - # https://github.com/pandas-dev/pandas/issues/35194 - indexer, _ = result.columns.get_indexer_non_unique(join_columns) - indexer = algorithms.unique1d(indexer) - result = result._reindex_with_indexers( - {1: [join_columns, indexer]}, allow_dups=True - ) - else: - result = result.reindex(join_columns, axis=1) - - return result - - def _should_reindex_frame_op(self, right, op, axis: int, fill_value, level) -> bool: - """ - Check if this is an operation between DataFrames that will need to reindex. - """ - if op is operator.pow or op is roperator.rpow: - # GH#32685 pow has special semantics for operating with null values - return False - - if not isinstance(right, FrameOps): - return False - - if fill_value is None and level is None and axis == 1: - # TODO: any other cases we should handle here? - - # Intersection is always unique so we have to check the unique columns - left_uniques = self.columns.unique() - right_uniques = right.columns.unique() - cols = left_uniques.intersection(right_uniques) - if len(cols) and not ( - len(cols) == len(left_uniques) and len(cols) == len(right_uniques) - ): - # TODO: is there a shortcut available when len(cols) == 0? - return True - - return False - - def _align_for_op( - self, other, axis, flex: bool | None = False, level: Level = None - ): - """ - Convert rhs to meet lhs dims if input is list, tuple or np.ndarray. - - Parameters - ---------- - left : DataFrame - right : Any - axis : int, str, or None - flex : bool or None, default False - Whether this is a flex op, in which case we reindex. - None indicates not to check for alignment. - level : int or level name, default None - - Returns - ------- - left : DataFrame - right : Any - """ - self = cast("DataFrame", self) - left, right = self, other - - def to_series(right): - msg = ( - "Unable to coerce to Series, " - "length must be {req_len}: given {given_len}" - ) - - # pass dtype to avoid doing inference, which would break consistency - # with Index/Series ops - dtype = None - if getattr(right, "dtype", None) == object: - # can't pass right.dtype unconditionally as that would break on e.g. - # datetime64[h] ndarray - dtype = object - - if axis is not None and left._get_axis_number(axis) == 0: - if len(left.index) != len(right): - raise ValueError( - msg.format(req_len=len(left.index), given_len=len(right)) - ) - right = left._constructor_sliced(right, index=left.index, dtype=dtype) - else: - if len(left.columns) != len(right): - raise ValueError( - msg.format(req_len=len(left.columns), given_len=len(right)) - ) - right = left._constructor_sliced(right, index=left.columns, dtype=dtype) - return right - - if isinstance(right, np.ndarray): - if right.ndim == 1: - right = to_series(right) - - elif right.ndim == 2: - # We need to pass dtype=right.dtype to retain object dtype - # otherwise we lose consistency with Index and array ops - dtype = None - if right.dtype == object: - # can't pass right.dtype unconditionally as that would break on e.g. - # datetime64[h] ndarray - dtype = object - - if right.shape == left.shape: - right = left._constructor( - right, index=left.index, columns=left.columns, dtype=dtype - ) - - elif right.shape[0] == left.shape[0] and right.shape[1] == 1: - # Broadcast across columns - right = np.broadcast_to(right, left.shape) - right = left._constructor( - right, index=left.index, columns=left.columns, dtype=dtype - ) - - elif right.shape[1] == left.shape[1] and right.shape[0] == 1: - # Broadcast along rows - right = to_series(right[0, :]) - - else: - raise ValueError( - "Unable to coerce to DataFrame, shape " - f"must be {left.shape}: given {right.shape}" - ) - - elif right.ndim > 2: - raise ValueError( - "Unable to coerce to Series/DataFrame, " - f"dimension must be <= 2: {right.shape}" - ) - - elif is_list_like(right) and not isinstance(right, (SeriesOps, FrameOps)): - # GH#36702. Raise when attempting arithmetic with list of array-like. - if any(is_array_like(el) for el in right): - raise ValueError( - f"Unable to coerce list of {type(right[0])} to Series/DataFrame" - ) - # GH#17901 - right = to_series(right) - - if flex is not None and isinstance(right, FrameOps): - rframe = cast("DataFrame", right) - if not left._indexed_same(rframe): - if flex: - left, right = left.align( - rframe, join="outer", level=level, copy=False - ) - else: - raise ValueError( - "Can only compare identically-labeled (both index and columns) " - "DataFrame objects" - ) - elif isinstance(right, SeriesOps): - right = cast("Series", right) - - # axis=1 is default for DataFrame-with-Series op - axis = left._get_axis_number(axis) if axis is not None else 1 - - if not flex: - if not left.axes[axis].equals(right.index): - raise ValueError( - "Operands are not aligned. Do " - "`left, right = left.align(right, axis=1, copy=False)` " - "before operating." - ) - - left, right = left.align( - # error: Argument 1 to "align" of "DataFrame" has incompatible - # type "Series"; expected "DataFrame" - right, # type: ignore[arg-type] - join="outer", - axis=axis, - level=level, - copy=False, - ) - right = left._maybe_align_series_as_frame(right, axis) - - return left, right - - def _maybe_align_series_as_frame(self, series: Series, axis: AxisInt): - """ - If the Series operand is not EA-dtype, we can broadcast to 2D and operate - blockwise. - """ - rvalues = series._values - if not isinstance(rvalues, np.ndarray): - # TODO(EA2D): no need to special-case with 2D EAs - if rvalues.dtype in ("datetime64[ns]", "timedelta64[ns]"): - # We can losslessly+cheaply cast to ndarray - rvalues = np.asarray(rvalues) - else: - return series - - if axis == 0: - rvalues = rvalues.reshape(-1, 1) - else: - rvalues = rvalues.reshape(1, -1) - - rvalues = np.broadcast_to(rvalues, self.shape) - # pass dtype to avoid doing inference - return self._constructor( - rvalues, - index=self.index, - columns=self.columns, - dtype=rvalues.dtype, - ) - - def _dispatch_frame_op(self, right, func: Callable, axis: AxisInt | None = None): - """ - Evaluate the frame operation func(left, right) by evaluating - column-by-column, dispatching to the Series implementation. - - Parameters - ---------- - right : scalar, Series, or DataFrame - func : arithmetic or comparison operator - axis : {None, 0, 1} - - Returns - ------- - DataFrame - """ - # Get the appropriate array-op to apply to each column/block's values. - array_op = get_array_op(func) - - right = lib.item_from_zerodim(right) - if not is_list_like(right): - # i.e. scalar, faster than checking np.ndim(right) == 0 - with np.errstate(all="ignore"): - bm = self._mgr.apply(array_op, right=right) - return self._constructor(bm) - - elif isinstance(right, FrameOps): - assert self.index.equals(right.index) - assert self.columns.equals(right.columns) - # TODO: The previous assertion `assert right._indexed_same(self)` - # fails in cases with empty columns reached via - # _arith_method_with_reindex - - # TODO operate_blockwise expects a manager of the same type - with np.errstate(all="ignore"): - bm = self._mgr.operate_blockwise( - # error: Argument 1 to "operate_blockwise" of "ArrayManager" has - # incompatible type "Union[ArrayManager, BlockManager]"; expected - # "ArrayManager" - # error: Argument 1 to "operate_blockwise" of "BlockManager" has - # incompatible type "Union[ArrayManager, BlockManager]"; expected - # "BlockManager" - right._mgr, # type: ignore[arg-type] - array_op, - ) - return self._constructor(bm) - - elif isinstance(right, SeriesOps) and axis == 1: - # axis=1 means we want to operate row-by-row - assert right.index.equals(self.columns) - - right = right._values - # maybe_align_as_frame ensures we do not have an ndarray here - assert not isinstance(right, np.ndarray) - - # error: "FrameOps" has no attribute "_iter_column_arrays" - col_arrays = self._iter_column_arrays() # type: ignore[attr-defined] - with np.errstate(all="ignore"): - arrays = [ - array_op(_left, _right) for _left, _right in zip(col_arrays, right) - ] - - elif isinstance(right, SeriesOps): - assert right.index.equals(self.index) - right = right._values - - # error: "FrameOps" has no attribute "_iter_column_arrays" - col_arrays = self._iter_column_arrays() # type: ignore[attr-defined] - with np.errstate(all="ignore"): - arrays = [array_op(left, right) for left in col_arrays] - - else: - raise NotImplementedError(right) - - # error: "Type[FrameOps]" has no attribute "_from_arrays" - return type(self)._from_arrays( # type: ignore[attr-defined] - arrays, - self.columns, - self.index, - verify_integrity=False, - ) - - def _combine_frame(self, other: FrameOps, func, fill_value=None): - # at this point we have `self._indexed_same(other)` - - if fill_value is None: - # since _arith_op may be called in a loop, avoid function call - # overhead if possible by doing this check once - _arith_op = func - - else: - - def _arith_op(left, right): - # for the mixed_type case where we iterate over columns, - # _arith_op(left, right) is equivalent to - # left._binop(right, func, fill_value=fill_value) - left, right = fill_binop(left, right, fill_value) - return func(left, right) - - new_data = self._dispatch_frame_op(other, _arith_op) - return new_data - - def _construct_result(self, result) -> DataFrame: - """ - Wrap the result of an arithmetic, comparison, or logical operation. - - Parameters - ---------- - result : DataFrame - - Returns - ------- - DataFrame - """ - out = self._constructor(result, copy=False) - out = out.__finalize__(self) - # Pin columns instead of passing to constructor for compat with - # non-unique columns case - out.columns = self.columns - out.index = self.index - return out - - def __divmod__(self, other) -> tuple[DataFrame, DataFrame]: - # Naive implementation, room for optimization - div = self // other - mod = self - div * other - return div, mod - - def __rdivmod__(self, other) -> tuple[DataFrame, DataFrame]: - # Naive implementation, room for optimization - div = other // self - mod = other - div * self - return div, mod - - def _flex_arith_method( - self, other, op, *, axis: Axis = "columns", level=None, fill_value=None - ): - axis = self._get_axis_number(axis) if axis is not None else 1 - - if self._should_reindex_frame_op(other, op, axis, fill_value, level): - return self._arith_method_with_reindex(other, op) - - if isinstance(other, SeriesOps) and fill_value is not None: - # TODO: We could allow this in cases where we end up going - # through the DataFrame path - raise NotImplementedError(f"fill_value {fill_value} not supported.") - - other = maybe_prepare_scalar_for_op( - other, - self.shape, - ) - self, other = self._align_for_op(other, axis, flex=True, level=level) - - if isinstance(other, FrameOps): - # Another DataFrame - new_data = self._combine_frame(other, op, fill_value) - - elif isinstance(other, SeriesOps): - new_data = self._dispatch_frame_op(other, op, axis=axis) - else: - # in this case we always have `np.ndim(other) == 0` - if fill_value is not None: - # error: "FrameOps" has no attribute "fillna" - self = self.fillna(fill_value) # type: ignore[attr-defined] - - new_data = self._dispatch_frame_op(other, op) - - return self._construct_result(new_data) - - def _flex_cmp_method(self, other, op, *, axis: Axis = "columns", level=None): - axis = self._get_axis_number(axis) if axis is not None else 1 - - self, other = self._align_for_op(other, axis, flex=True, level=level) - - new_data = self._dispatch_frame_op(other, op, axis=axis) - return self._construct_result(new_data) - - @Appender(make_flex_doc("eq", "dataframe")) - def eq(self, other, axis: Axis = "columns", level=None): - return self._flex_cmp_method(other, operator.eq, axis=axis, level=level) - - @Appender(make_flex_doc("ne", "dataframe")) - def ne(self, other, axis: Axis = "columns", level=None): - return self._flex_cmp_method(other, operator.ne, axis=axis, level=level) - - @Appender(make_flex_doc("le", "dataframe")) - def le(self, other, axis: Axis = "columns", level=None): - return self._flex_cmp_method(other, operator.le, axis=axis, level=level) - - @Appender(make_flex_doc("lt", "dataframe")) - def lt(self, other, axis: Axis = "columns", level=None): - return self._flex_cmp_method(other, operator.lt, axis=axis, level=level) - - @Appender(make_flex_doc("ge", "dataframe")) - def ge(self, other, axis: Axis = "columns", level=None): - return self._flex_cmp_method(other, operator.ge, axis=axis, level=level) - - @Appender(make_flex_doc("gt", "dataframe")) - def gt(self, other, axis: Axis = "columns", level=None): - return self._flex_cmp_method(other, operator.gt, axis=axis, level=level) - - @Appender(make_flex_doc("add", "dataframe")) - def add(self, other, axis: Axis = "columns", level=None, fill_value=None): - return self._flex_arith_method( - other, operator.add, level=level, fill_value=fill_value, axis=axis - ) - - @Appender(make_flex_doc("radd", "dataframe")) - def radd(self, other, axis: Axis = "columns", level=None, fill_value=None): - return self._flex_arith_method( - other, roperator.radd, level=level, fill_value=fill_value, axis=axis - ) - - @Appender(make_flex_doc("sub", "dataframe")) - def sub(self, other, axis: Axis = "columns", level=None, fill_value=None): - return self._flex_arith_method( - other, operator.sub, level=level, fill_value=fill_value, axis=axis - ) - - subtract = sub - - @Appender(make_flex_doc("rsub", "dataframe")) - def rsub(self, other, axis: Axis = "columns", level=None, fill_value=None): - return self._flex_arith_method( - other, roperator.rsub, level=level, fill_value=fill_value, axis=axis - ) - - @Appender(make_flex_doc("mul", "dataframe")) - def mul(self, other, axis: Axis = "columns", level=None, fill_value=None): - return self._flex_arith_method( - other, operator.mul, level=level, fill_value=fill_value, axis=axis - ) - - multiply = mul - - @Appender(make_flex_doc("rmul", "dataframe")) - def rmul(self, other, axis: Axis = "columns", level=None, fill_value=None): - return self._flex_arith_method( - other, roperator.rmul, level=level, fill_value=fill_value, axis=axis - ) - - @Appender(make_flex_doc("truediv", "dataframe")) - def truediv(self, other, axis: Axis = "columns", level=None, fill_value=None): - return self._flex_arith_method( - other, operator.truediv, level=level, fill_value=fill_value, axis=axis - ) - - div = truediv - divide = truediv - - @Appender(make_flex_doc("rtruediv", "dataframe")) - def rtruediv(self, other, axis: Axis = "columns", level=None, fill_value=None): - return self._flex_arith_method( - other, roperator.rtruediv, level=level, fill_value=fill_value, axis=axis - ) - - rdiv = rtruediv - - @Appender(make_flex_doc("floordiv", "dataframe")) - def floordiv(self, other, axis: Axis = "columns", level=None, fill_value=None): - return self._flex_arith_method( - other, operator.floordiv, level=level, fill_value=fill_value, axis=axis - ) - - @Appender(make_flex_doc("rfloordiv", "dataframe")) - def rfloordiv(self, other, axis: Axis = "columns", level=None, fill_value=None): - return self._flex_arith_method( - other, roperator.rfloordiv, level=level, fill_value=fill_value, axis=axis - ) - - @Appender(make_flex_doc("mod", "dataframe")) - def mod(self, other, axis: Axis = "columns", level=None, fill_value=None): - return self._flex_arith_method( - other, operator.mod, level=level, fill_value=fill_value, axis=axis - ) - - @Appender(make_flex_doc("rmod", "dataframe")) - def rmod(self, other, axis: Axis = "columns", level=None, fill_value=None): - return self._flex_arith_method( - other, roperator.rmod, level=level, fill_value=fill_value, axis=axis - ) - - @Appender(make_flex_doc("pow", "dataframe")) - def pow(self, other, axis: Axis = "columns", level=None, fill_value=None): - return self._flex_arith_method( - other, operator.pow, level=level, fill_value=fill_value, axis=axis - ) - - @Appender(make_flex_doc("rpow", "dataframe")) - def rpow(self, other, axis: Axis = "columns", level=None, fill_value=None): - return self._flex_arith_method( - other, roperator.rpow, level=level, fill_value=fill_value, axis=axis - ) - - -class SeriesOps: - _constructor: Callable[..., Series] - _get_axis_number: Callable[[Any], int] - _values: ArrayLike - index: AxisProperty - - def _cmp_method(self, other, op): - res_name = get_op_result_name(self, other) - - if isinstance(other, SeriesOps): - # error: "SeriesOps" has no attribute "_indexed_same" - if not self._indexed_same(other): # type: ignore[attr-defined] - raise ValueError("Can only compare identically-labeled Series objects") - - lvalues = self._values - rvalues = extract_array(other, extract_numpy=True, extract_range=True) - - with np.errstate(all="ignore"): - res_values = comparison_op(lvalues, rvalues, op) - - return self._construct_result(res_values, name=res_name) - - def _logical_method(self, other, op): - res_name = get_op_result_name(self, other) - self, other = self._align_for_op(other, align_asobject=True) - - lvalues = self._values - rvalues = extract_array(other, extract_numpy=True, extract_range=True) - - res_values = logical_op(lvalues, rvalues, op) - return self._construct_result(res_values, name=res_name) - - def _arith_method(self, other, op): - self, other = self._align_for_op(other) - - # use IndexOpsMixin._arith_method - # error: "_arith_method" undefined in superclass - return super()._arith_method(other, op) # type: ignore[misc] - - def _align_for_op(self, right, align_asobject: bool = False): - """align lhs and rhs Series""" - # TODO: Different from DataFrame._align_for_op, list, tuple and ndarray - # are not coerced here - # because Series has inconsistencies described in GH#13637 - left = self - - if isinstance(right, SeriesOps): - # avoid repeated alignment - if not left.index.equals(right.index): - if align_asobject: - # to keep original value's dtype for bool ops - # error: "SeriesOps" has no attribute "astype" - left = left.astype(object) # type: ignore[attr-defined] - # error: "SeriesOps" has no attribute "astype" - right = right.astype(object) # type: ignore[attr-defined] - - # error: "SeriesOps" has no attribute "align" - left, right = left.align( # type: ignore[attr-defined] - right, copy=False - ) - - return left, right - - def _binop(self, other: SeriesOps, func, level=None, fill_value=None): - """ - Perform generic binary operation with optional fill value. - - Parameters - ---------- - other : Series - func : binary operator - fill_value : float or object - Value to substitute for NA/null values. If both Series are NA in a - location, the result will be NA regardless of the passed fill value. - level : int or level name, default None - Broadcast across a level, matching Index values on the - passed MultiIndex level. - - Returns - ------- - Series - """ - if not isinstance(other, SeriesOps): - raise AssertionError("Other operand must be Series") - - this = self - - if not self.index.equals(other.index): - # error: "SeriesOps" has no attribute "align" - this, other = self.align( # type: ignore[attr-defined] - other, level=level, join="outer", copy=False - ) - - this_vals, other_vals = fill_binop(this._values, other._values, fill_value) - - with np.errstate(all="ignore"): - result = func(this_vals, other_vals) - - name = get_op_result_name(self, other) - return this._construct_result(result, name) - - def _construct_result( - self, result: ArrayLike | tuple[ArrayLike, ArrayLike], name: Hashable - ) -> Series | tuple[Series, Series]: - """ - Construct an appropriately-labelled Series from the result of an op. - - Parameters - ---------- - result : ndarray or ExtensionArray - name : Label - - Returns - ------- - Series - In the case of __divmod__ or __rdivmod__, a 2-tuple of Series. - """ - if isinstance(result, tuple): - # produced by divmod or rdivmod - - res1 = self._construct_result(result[0], name=name) - res2 = self._construct_result(result[1], name=name) - - # GH#33427 assertions to keep mypy happy - assert isinstance(res1, SeriesOps) - assert isinstance(res2, SeriesOps) - return (res1, res2) - - # TODO: result should always be ArrayLike, but this fails for some - # JSONArray tests - dtype = getattr(result, "dtype", None) - out = self._constructor( - result, - index=self.index, - dtype=dtype, - ) - out = out.__finalize__(self) - - # Set the result's name after __finalize__ is called because __finalize__ - # would set it back to self.name - out.name = name # pyright: ignore[reportGeneralTypeIssues] - return out - - def _flex_method(self, other, op, *, level=None, fill_value=None, axis: Axis = 0): - if axis is not None: - self._get_axis_number(axis) - - res_name = get_op_result_name(self, other) - - if isinstance(other, SeriesOps): - return self._binop(other, op, level=level, fill_value=fill_value) - elif isinstance(other, (np.ndarray, list, tuple)): - # error: Argument 1 to "len" has incompatible type "SeriesOps"; - # expected "Sized" - if len(other) != len(self): # type: ignore[arg-type] - raise ValueError("Lengths must be equal") - other = self._constructor(other, self.index) - result = self._binop(other, op, level=level, fill_value=fill_value) - result.name = res_name - return result - else: - if fill_value is not None: - # error: "SeriesOps" has no attribute "fillna" - self = self.fillna(fill_value) # type: ignore[attr-defined] - - return op(self, other) - - @Appender(make_flex_doc("eq", "series")) - def eq(self, other, level=None, fill_value=None, axis: Axis = 0): - return self._flex_method( - other, operator.eq, level=level, fill_value=fill_value, axis=axis - ) - - @Appender(make_flex_doc("ne", "series")) - def ne(self, other, level=None, fill_value=None, axis: Axis = 0): - return self._flex_method( - other, operator.ne, level=level, fill_value=fill_value, axis=axis - ) - - @Appender(make_flex_doc("le", "series")) - def le(self, other, level=None, fill_value=None, axis: Axis = 0): - return self._flex_method( - other, operator.le, level=level, fill_value=fill_value, axis=axis - ) - - @Appender(make_flex_doc("lt", "series")) - def lt(self, other, level=None, fill_value=None, axis: Axis = 0): - return self._flex_method( - other, operator.lt, level=level, fill_value=fill_value, axis=axis - ) - - @Appender(make_flex_doc("ge", "series")) - def ge(self, other, level=None, fill_value=None, axis: Axis = 0): - return self._flex_method( - other, operator.ge, level=level, fill_value=fill_value, axis=axis - ) - - @Appender(make_flex_doc("gt", "series")) - def gt(self, other, level=None, fill_value=None, axis: Axis = 0): - return self._flex_method( - other, operator.gt, level=level, fill_value=fill_value, axis=axis - ) - - @Appender(make_flex_doc("add", "series")) - def add(self, other, level=None, fill_value=None, axis: Axis = 0): - return self._flex_method( - other, operator.add, level=level, fill_value=fill_value, axis=axis - ) - - @Appender(make_flex_doc("radd", "series")) - def radd(self, other, level=None, fill_value=None, axis: Axis = 0): - return self._flex_method( - other, roperator.radd, level=level, fill_value=fill_value, axis=axis - ) - - @Appender(make_flex_doc("sub", "series")) - def sub(self, other, level=None, fill_value=None, axis: Axis = 0): - return self._flex_method( - other, operator.sub, level=level, fill_value=fill_value, axis=axis - ) - - subtract = sub - - @Appender(make_flex_doc("rsub", "series")) - def rsub(self, other, level=None, fill_value=None, axis: Axis = 0): - return self._flex_method( - other, roperator.rsub, level=level, fill_value=fill_value, axis=axis - ) - - @Appender(make_flex_doc("mul", "series")) - def mul(self, other, level=None, fill_value=None, axis: Axis = 0): - return self._flex_method( - other, operator.mul, level=level, fill_value=fill_value, axis=axis - ) - - multiply = mul - - @Appender(make_flex_doc("rmul", "series")) - def rmul(self, other, level=None, fill_value=None, axis: Axis = 0): - return self._flex_method( - other, roperator.rmul, level=level, fill_value=fill_value, axis=axis - ) - - @Appender(make_flex_doc("truediv", "series")) - def truediv(self, other, level=None, fill_value=None, axis: Axis = 0): - return self._flex_method( - other, operator.truediv, level=level, fill_value=fill_value, axis=axis - ) - - div = truediv - divide = truediv - - @Appender(make_flex_doc("rtruediv", "series")) - def rtruediv(self, other, level=None, fill_value=None, axis: Axis = 0): - return self._flex_method( - other, roperator.rtruediv, level=level, fill_value=fill_value, axis=axis - ) - - rdiv = rtruediv - - @Appender(make_flex_doc("floordiv", "series")) - def floordiv(self, other, level=None, fill_value=None, axis: Axis = 0): - return self._flex_method( - other, operator.floordiv, level=level, fill_value=fill_value, axis=axis - ) - - @Appender(make_flex_doc("rfloordiv", "series")) - def rfloordiv(self, other, level=None, fill_value=None, axis: Axis = 0): - return self._flex_method( - other, roperator.rfloordiv, level=level, fill_value=fill_value, axis=axis - ) - - @Appender(make_flex_doc("mod", "series")) - def mod(self, other, level=None, fill_value=None, axis: Axis = 0): - return self._flex_method( - other, operator.mod, level=level, fill_value=fill_value, axis=axis - ) - - @Appender(make_flex_doc("rmod", "series")) - def rmod(self, other, level=None, fill_value=None, axis: Axis = 0): - return self._flex_method( - other, roperator.rmod, level=level, fill_value=fill_value, axis=axis - ) - - @Appender(make_flex_doc("pow", "series")) - def pow(self, other, level=None, fill_value=None, axis: Axis = 0): - return self._flex_method( - other, operator.pow, level=level, fill_value=fill_value, axis=axis - ) - - @Appender(make_flex_doc("rpow", "series")) - def rpow(self, other, level=None, fill_value=None, axis: Axis = 0): - return self._flex_method( - other, roperator.rpow, level=level, fill_value=fill_value, axis=axis - ) - - @Appender(make_flex_doc("divmod", "series")) - def divmod(self, other, level=None, fill_value=None, axis: Axis = 0): - return self._flex_method( - other, divmod, level=level, fill_value=fill_value, axis=axis - ) - - @Appender(make_flex_doc("rdivmod", "series")) - def rdivmod(self, other, level=None, fill_value=None, axis: Axis = 0): - return self._flex_method( - other, roperator.rdivmod, level=level, fill_value=fill_value, axis=axis - ) diff --git a/pandas/core/series.py b/pandas/core/series.py index 3ee1f312be8f4..6f22355d59676 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3,6 +3,7 @@ """ from __future__ import annotations +import operator import sys from textwrap import dedent from typing import ( @@ -91,13 +92,17 @@ missing, nanops, ops, + roperator, ) from pandas.core.accessor import CachedAccessor from pandas.core.apply import SeriesApply from pandas.core.arrays import ExtensionArray from pandas.core.arrays.categorical import CategoricalAccessor from pandas.core.arrays.sparse import SparseAccessor -from pandas.core.construction import sanitize_array +from pandas.core.construction import ( + extract_array, + sanitize_array, +) from pandas.core.generic import NDFrame from pandas.core.indexers import ( disallow_ndim_indexing, @@ -123,7 +128,6 @@ SingleBlockManager, ) from pandas.core.methods import selectn -from pandas.core.ops.methods import SeriesOps from pandas.core.shared_docs import _shared_docs from pandas.core.sorting import ( ensure_key_mapped, @@ -237,7 +241,7 @@ def wrapper(self): # definition in base class "NDFrame" # error: Definition of "min" in base class "IndexOpsMixin" is incompatible with # definition in base class "NDFrame" -class Series(SeriesOps, base.IndexOpsMixin, NDFrame): # type: ignore[misc] +class Series(base.IndexOpsMixin, NDFrame): # type: ignore[misc] """ One-dimensional ndarray with axis labels (including time series). @@ -552,8 +556,7 @@ def _init_dict( # ---------------------------------------------------------------------- @property - # error: Cannot override writeable attribute with read-only property - def _constructor(self) -> Callable[..., Series]: # type: ignore[override] + def _constructor(self) -> Callable[..., Series]: return Series @property @@ -5932,5 +5935,291 @@ def mask( # Add plotting methods to Series hist = pandas.plotting.hist_series + # ---------------------------------------------------------------------- + # Template-Based Arithmetic/Comparison Methods + + def _cmp_method(self, other, op): + res_name = ops.get_op_result_name(self, other) + + if isinstance(other, Series) and not self._indexed_same(other): + raise ValueError("Can only compare identically-labeled Series objects") + + lvalues = self._values + rvalues = extract_array(other, extract_numpy=True, extract_range=True) + + with np.errstate(all="ignore"): + res_values = ops.comparison_op(lvalues, rvalues, op) + + return self._construct_result(res_values, name=res_name) + + def _logical_method(self, other, op): + res_name = ops.get_op_result_name(self, other) + self, other = self._align_for_op(other, align_asobject=True) + + lvalues = self._values + rvalues = extract_array(other, extract_numpy=True, extract_range=True) + + res_values = ops.logical_op(lvalues, rvalues, op) + return self._construct_result(res_values, name=res_name) + + def _arith_method(self, other, op): + self, other = self._align_for_op(other) + return base.IndexOpsMixin._arith_method(self, other, op) + + def _align_for_op(self, right, align_asobject: bool = False): + """align lhs and rhs Series""" + # TODO: Different from DataFrame._align_for_op, list, tuple and ndarray + # are not coerced here + # because Series has inconsistencies described in GH#13637 + left = self + + if isinstance(right, Series): + # avoid repeated alignment + if not left.index.equals(right.index): + if align_asobject: + # to keep original value's dtype for bool ops + left = left.astype(object) + right = right.astype(object) + + left, right = left.align(right, copy=False) + + return left, right + + def _binop(self, other: Series, func, level=None, fill_value=None) -> Series: + """ + Perform generic binary operation with optional fill value. + + Parameters + ---------- + other : Series + func : binary operator + fill_value : float or object + Value to substitute for NA/null values. If both Series are NA in a + location, the result will be NA regardless of the passed fill value. + level : int or level name, default None + Broadcast across a level, matching Index values on the + passed MultiIndex level. + + Returns + ------- + Series + """ + if not isinstance(other, Series): + raise AssertionError("Other operand must be Series") + + this = self + + if not self.index.equals(other.index): + this, other = self.align(other, level=level, join="outer", copy=False) + + this_vals, other_vals = ops.fill_binop(this._values, other._values, fill_value) + + with np.errstate(all="ignore"): + result = func(this_vals, other_vals) + + name = ops.get_op_result_name(self, other) + out = this._construct_result(result, name) + return cast(Series, out) + + def _construct_result( + self, result: ArrayLike | tuple[ArrayLike, ArrayLike], name: Hashable + ) -> Series | tuple[Series, Series]: + """ + Construct an appropriately-labelled Series from the result of an op. + + Parameters + ---------- + result : ndarray or ExtensionArray + name : Label + + Returns + ------- + Series + In the case of __divmod__ or __rdivmod__, a 2-tuple of Series. + """ + if isinstance(result, tuple): + # produced by divmod or rdivmod + + res1 = self._construct_result(result[0], name=name) + res2 = self._construct_result(result[1], name=name) + + # GH#33427 assertions to keep mypy happy + assert isinstance(res1, Series) + assert isinstance(res2, Series) + return (res1, res2) + + # TODO: result should always be ArrayLike, but this fails for some + # JSONArray tests + dtype = getattr(result, "dtype", None) + out = self._constructor(result, index=self.index, dtype=dtype) + out = out.__finalize__(self) + + # Set the result's name after __finalize__ is called because __finalize__ + # would set it back to self.name + out.name = name + return out + + def _flex_method(self, other, op, *, level=None, fill_value=None, axis: Axis = 0): + if axis is not None: + self._get_axis_number(axis) + + res_name = ops.get_op_result_name(self, other) + + if isinstance(other, Series): + return self._binop(other, op, level=level, fill_value=fill_value) + elif isinstance(other, (np.ndarray, list, tuple)): + if len(other) != len(self): + raise ValueError("Lengths must be equal") + other = self._constructor(other, self.index) + result = self._binop(other, op, level=level, fill_value=fill_value) + result.name = res_name + return result + else: + if fill_value is not None: + self = self.fillna(fill_value) + + return op(self, other) + + @Appender(ops.make_flex_doc("eq", "series")) + def eq(self, other, level=None, fill_value=None, axis: Axis = 0): + return self._flex_method( + other, operator.eq, level=level, fill_value=fill_value, axis=axis + ) + + @Appender(ops.make_flex_doc("ne", "series")) + def ne(self, other, level=None, fill_value=None, axis: Axis = 0): + return self._flex_method( + other, operator.ne, level=level, fill_value=fill_value, axis=axis + ) + + @Appender(ops.make_flex_doc("le", "series")) + def le(self, other, level=None, fill_value=None, axis: Axis = 0): + return self._flex_method( + other, operator.le, level=level, fill_value=fill_value, axis=axis + ) + + @Appender(ops.make_flex_doc("lt", "series")) + def lt(self, other, level=None, fill_value=None, axis: Axis = 0): + return self._flex_method( + other, operator.lt, level=level, fill_value=fill_value, axis=axis + ) + + @Appender(ops.make_flex_doc("ge", "series")) + def ge(self, other, level=None, fill_value=None, axis: Axis = 0): + return self._flex_method( + other, operator.ge, level=level, fill_value=fill_value, axis=axis + ) + + @Appender(ops.make_flex_doc("gt", "series")) + def gt(self, other, level=None, fill_value=None, axis: Axis = 0): + return self._flex_method( + other, operator.gt, level=level, fill_value=fill_value, axis=axis + ) + + @Appender(ops.make_flex_doc("add", "series")) + def add(self, other, level=None, fill_value=None, axis: Axis = 0): + return self._flex_method( + other, operator.add, level=level, fill_value=fill_value, axis=axis + ) + + @Appender(ops.make_flex_doc("radd", "series")) + def radd(self, other, level=None, fill_value=None, axis: Axis = 0): + return self._flex_method( + other, roperator.radd, level=level, fill_value=fill_value, axis=axis + ) + + @Appender(ops.make_flex_doc("sub", "series")) + def sub(self, other, level=None, fill_value=None, axis: Axis = 0): + return self._flex_method( + other, operator.sub, level=level, fill_value=fill_value, axis=axis + ) + + subtract = sub + + @Appender(ops.make_flex_doc("rsub", "series")) + def rsub(self, other, level=None, fill_value=None, axis: Axis = 0): + return self._flex_method( + other, roperator.rsub, level=level, fill_value=fill_value, axis=axis + ) + + @Appender(ops.make_flex_doc("mul", "series")) + def mul(self, other, level=None, fill_value=None, axis: Axis = 0): + return self._flex_method( + other, operator.mul, level=level, fill_value=fill_value, axis=axis + ) + + multiply = mul + + @Appender(ops.make_flex_doc("rmul", "series")) + def rmul(self, other, level=None, fill_value=None, axis: Axis = 0): + return self._flex_method( + other, roperator.rmul, level=level, fill_value=fill_value, axis=axis + ) + + @Appender(ops.make_flex_doc("truediv", "series")) + def truediv(self, other, level=None, fill_value=None, axis: Axis = 0): + return self._flex_method( + other, operator.truediv, level=level, fill_value=fill_value, axis=axis + ) + + div = truediv + divide = truediv + + @Appender(ops.make_flex_doc("rtruediv", "series")) + def rtruediv(self, other, level=None, fill_value=None, axis: Axis = 0): + return self._flex_method( + other, roperator.rtruediv, level=level, fill_value=fill_value, axis=axis + ) + + rdiv = rtruediv + + @Appender(ops.make_flex_doc("floordiv", "series")) + def floordiv(self, other, level=None, fill_value=None, axis: Axis = 0): + return self._flex_method( + other, operator.floordiv, level=level, fill_value=fill_value, axis=axis + ) + + @Appender(ops.make_flex_doc("rfloordiv", "series")) + def rfloordiv(self, other, level=None, fill_value=None, axis: Axis = 0): + return self._flex_method( + other, roperator.rfloordiv, level=level, fill_value=fill_value, axis=axis + ) + + @Appender(ops.make_flex_doc("mod", "series")) + def mod(self, other, level=None, fill_value=None, axis: Axis = 0): + return self._flex_method( + other, operator.mod, level=level, fill_value=fill_value, axis=axis + ) + + @Appender(ops.make_flex_doc("rmod", "series")) + def rmod(self, other, level=None, fill_value=None, axis: Axis = 0): + return self._flex_method( + other, roperator.rmod, level=level, fill_value=fill_value, axis=axis + ) + + @Appender(ops.make_flex_doc("pow", "series")) + def pow(self, other, level=None, fill_value=None, axis: Axis = 0): + return self._flex_method( + other, operator.pow, level=level, fill_value=fill_value, axis=axis + ) + + @Appender(ops.make_flex_doc("rpow", "series")) + def rpow(self, other, level=None, fill_value=None, axis: Axis = 0): + return self._flex_method( + other, roperator.rpow, level=level, fill_value=fill_value, axis=axis + ) + + @Appender(ops.make_flex_doc("divmod", "series")) + def divmod(self, other, level=None, fill_value=None, axis: Axis = 0): + return self._flex_method( + other, divmod, level=level, fill_value=fill_value, axis=axis + ) + + @Appender(ops.make_flex_doc("rdivmod", "series")) + def rdivmod(self, other, level=None, fill_value=None, axis: Axis = 0): + return self._flex_method( + other, roperator.rdivmod, level=level, fill_value=fill_value, axis=axis + ) + Series._add_numeric_operations() From bcde52927ce1df70152dbaa230a5ba14de6c6939 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 8 Mar 2023 08:49:57 -0800 Subject: [PATCH 10/10] fix doctest --- pandas/core/ops/docstrings.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/ops/docstrings.py b/pandas/core/ops/docstrings.py index 37decdedeb90e..9a469169151c3 100644 --- a/pandas/core/ops/docstrings.py +++ b/pandas/core/ops/docstrings.py @@ -176,8 +176,8 @@ def make_flex_doc(op_name: str, typ: str) -> str: + """ >>> a.divmod(b, fill_value=0) (a 1.0 - b NaN - c NaN + b inf + c inf d 0.0 e NaN dtype: float64,