Skip to content

make ops.add_foo take just class #19828

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Feb 26, 2018
4 changes: 2 additions & 2 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -6106,8 +6106,8 @@ def isin(self, values):
DataFrame._add_numeric_operations()
DataFrame._add_series_or_dataframe_operations()

ops.add_flex_arithmetic_methods(DataFrame, **ops.frame_flex_funcs)
ops.add_special_arithmetic_methods(DataFrame, **ops.frame_special_funcs)
ops.add_flex_arithmetic_methods(DataFrame)
ops.add_special_arithmetic_methods(DataFrame)


def _arrays_to_mgr(arrays, arr_names, index, columns, dtype=None):
Expand Down
204 changes: 135 additions & 69 deletions pandas/core/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
construct_1d_object_array_from_listlike)
from pandas.core.dtypes.generic import (
ABCSeries,
ABCDataFrame,
ABCDataFrame, ABCPanel,
ABCIndex,
ABCSparseSeries, ABCSparseArray)

Expand Down Expand Up @@ -567,6 +567,84 @@ def mask_cmp_op(x, y, op, allowed_types):
# Functions that add arithmetic methods to objects, given arithmetic factory
# methods

def _get_flex_wrappers(cls):
"""
Find the appropriate operation-wrappers to use when defining flex
arithmetic or boolean operations with the given class.

Parameters
----------
cls : class

Returns
-------
arith_method : function or None
comp_method : function or None

Notes
-----
None is only returned for SparseArray
"""
if issubclass(cls, ABCSeries):
# Same for Series and SparseSeries
# Note this check must come before check for ABCSparseArray
# or else if will incorrectly catch SparseSeries.
arith_method = _flex_method_SERIES
comp_method = _flex_method_SERIES
elif issubclass(cls, ABCSparseArray):
arith_method = None
comp_method = None
elif issubclass(cls, ABCPanel):
arith_method = _flex_method_PANEL
comp_method = _comp_method_PANEL
elif issubclass(cls, ABCDataFrame):
# Same for DataFrame and SparseDataFrame
arith_method = _arith_method_FRAME
comp_method = _flex_comp_method_FRAME
return arith_method, comp_method


def _get_special_wrappers(cls):
"""
Find the appropriate operation-wrappers to use when defining special
arithmetic, comparison, or boolean operations with the given class.

Parameters
----------
cls : class

Returns
-------
arith_method : function
comp_method : function
bool_method : function
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it would be better just have a dict here keyed on the cls and just returns the 5 methods. This seems a bit overkill

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds good.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Implementing this it ends up being pretty close to a wash. We have to key on cls._typ most of the time but also check for cls._subtyp for SparseSeries and SparseDataFrame. At this point I'm leaning towards "being a little bit verbose here is OK in this case". The main upside is its helpful to see which wrappers are used where without having to jump around the file.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok, can do in a single function then

"""
if issubclass(cls, ABCSparseSeries):
# Be sure to catch this before ABCSeries and ABCSparseArray,
# as they will both come see SparseSeries as a subclass
arith_method = _arith_method_SPARSE_SERIES
comp_method = _arith_method_SPARSE_SERIES
bool_method = _bool_method_SERIES
# TODO: I don't think the functions defined by bool_method are tested
elif issubclass(cls, ABCSparseArray):
arith_method = _arith_method_SPARSE_ARRAY
comp_method = _arith_method_SPARSE_ARRAY
bool_method = _arith_method_SPARSE_ARRAY
elif issubclass(cls, ABCPanel):
arith_method = _arith_method_PANEL
comp_method = _comp_method_PANEL
bool_method = _arith_method_PANEL
elif issubclass(cls, ABCDataFrame):
# Same for DataFrame and SparseDataFrame
arith_method = _arith_method_FRAME
comp_method = _comp_method_FRAME
bool_method = _arith_method_FRAME
elif issubclass(cls, ABCSeries):
arith_method = _arith_method_SERIES
comp_method = _comp_method_SERIES
bool_method = _bool_method_SERIES
return arith_method, comp_method, bool_method


def _create_methods(cls, arith_method, comp_method, bool_method,
special=False):
Expand Down Expand Up @@ -599,16 +677,18 @@ def _create_methods(cls, arith_method, comp_method, bool_method,
# yapf: enable
new_methods['div'] = new_methods['truediv']
new_methods['rdiv'] = new_methods['rtruediv']
if have_divmod:
# divmod doesn't have an op that is supported by numexpr
new_methods['divmod'] = arith_method(cls, divmod, special)

new_methods.update(dict(
eq=comp_method(cls, operator.eq, special),
ne=comp_method(cls, operator.ne, special),
lt=comp_method(cls, operator.lt, special),
gt=comp_method(cls, operator.gt, special),
le=comp_method(cls, operator.le, special),
ge=comp_method(cls, operator.ge, special)))

# Comp methods never had a default axis set
if comp_method:
new_methods.update(dict(
eq=comp_method(cls, operator.eq, special),
ne=comp_method(cls, operator.ne, special),
lt=comp_method(cls, operator.lt, special),
gt=comp_method(cls, operator.gt, special),
le=comp_method(cls, operator.le, special),
ge=comp_method(cls, operator.ge, special)))
if bool_method:
new_methods.update(
dict(and_=bool_method(cls, operator.and_, special),
Expand All @@ -618,9 +698,6 @@ def _create_methods(cls, arith_method, comp_method, bool_method,
rand_=bool_method(cls, rand_, special),
ror_=bool_method(cls, ror_, special),
rxor=bool_method(cls, rxor, special)))
if have_divmod:
# divmod doesn't have an op that is supported by numexpr
new_methods['divmod'] = arith_method(cls, divmod, special)

if special:
dunderize = lambda x: '__{name}__'.format(name=x.strip('_'))
Expand All @@ -644,22 +721,17 @@ def add_methods(cls, new_methods):

# ----------------------------------------------------------------------
# Arithmetic
def add_special_arithmetic_methods(cls, arith_method=None,
comp_method=None, bool_method=None):
def add_special_arithmetic_methods(cls):
"""
Adds the full suite of special arithmetic methods (``__add__``,
``__sub__``, etc.) to the class.

Parameters
----------
arith_method : function (optional)
factory for special arithmetic methods:
f(cls, op, special)
comp_method : function (optional)
factory for rich comparison - signature: f(cls, op, special)
bool_method : function (optional)
factory for boolean methods - signature: f(cls, op, special)
cls : class
special methods will be defined and pinned to this class
"""
arith_method, comp_method, bool_method = _get_special_wrappers(cls)
new_methods = _create_methods(cls, arith_method, comp_method, bool_method,
special=True)
# inplace operators (I feel like these should get passed an `inplace=True`
Expand Down Expand Up @@ -692,28 +764,26 @@ def f(self, other):
__ipow__=_wrap_inplace_method(new_methods["__pow__"])))
if not compat.PY3:
new_methods["__idiv__"] = _wrap_inplace_method(new_methods["__div__"])
if bool_method:
new_methods.update(
dict(__iand__=_wrap_inplace_method(new_methods["__and__"]),
__ior__=_wrap_inplace_method(new_methods["__or__"]),
__ixor__=_wrap_inplace_method(new_methods["__xor__"])))

new_methods.update(
dict(__iand__=_wrap_inplace_method(new_methods["__and__"]),
__ior__=_wrap_inplace_method(new_methods["__or__"]),
__ixor__=_wrap_inplace_method(new_methods["__xor__"])))

add_methods(cls, new_methods=new_methods)


def add_flex_arithmetic_methods(cls, flex_arith_method, flex_comp_method=None):
def add_flex_arithmetic_methods(cls):
"""
Adds the full suite of flex arithmetic methods (``pow``, ``mul``, ``add``)
to the class.

Parameters
----------
flex_arith_method : function
factory for flex arithmetic methods:
f(cls, op, special)
flex_comp_method : function, optional,
factory for rich comparison - signature: f(cls, op, special)
cls : class
flex methods will be defined and pinned to this class
"""
flex_arith_method, flex_comp_method = _get_flex_wrappers(cls)
new_methods = _create_methods(cls, flex_arith_method,
flex_comp_method, bool_method=None,
special=False)
Expand Down Expand Up @@ -1149,14 +1219,6 @@ def flex_wrapper(self, other, level=None, fill_value=None, axis=0):
return flex_wrapper


series_flex_funcs = dict(flex_arith_method=_flex_method_SERIES,
flex_comp_method=_flex_method_SERIES)

series_special_funcs = dict(arith_method=_arith_method_SERIES,
comp_method=_comp_method_SERIES,
bool_method=_bool_method_SERIES)


# -----------------------------------------------------------------------------
# DataFrame

Expand Down Expand Up @@ -1398,14 +1460,6 @@ def f(self, other):
return f


frame_flex_funcs = dict(flex_arith_method=_arith_method_FRAME,
flex_comp_method=_flex_comp_method_FRAME)

frame_special_funcs = dict(arith_method=_arith_method_FRAME,
comp_method=_comp_method_FRAME,
bool_method=_arith_method_FRAME)


# -----------------------------------------------------------------------------
# Panel

Expand Down Expand Up @@ -1494,16 +1548,38 @@ def f(self, other, axis=0):
return f


panel_special_funcs = dict(arith_method=_arith_method_PANEL,
comp_method=_comp_method_PANEL,
bool_method=_arith_method_PANEL)

panel_flex_funcs = dict(flex_arith_method=_flex_method_PANEL,
flex_comp_method=_comp_method_PANEL)

# -----------------------------------------------------------------------------
# Sparse

def _cast_sparse_series_op(left, right, opname):
"""
For SparseSeries operation, coerce to float64 if the result is expected
to have NaN or inf values

Parameters
----------
left : SparseArray
right : SparseArray
opname : str

Returns
-------
left : SparseArray
right : SparseArray
"""
opname = opname.strip('_')

if is_integer_dtype(left) and is_integer_dtype(right):
# series coerces to float64 if result should have NaN/inf
if opname in ('floordiv', 'mod') and (right.values == 0).any():
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this should go thru the fill_zeros path.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this obvious? I don't know the sparse code all that well, have been assuming that the do-it-upfront approach taken here was for a reason.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There will need to be a pass soon to make Series division handle division by zero the way that Index division now does. That might be a reasonable time to take a close look at this.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok pls add it to the list.

left = left.astype(np.float64)
right = right.astype(np.float64)
elif opname in ('rfloordiv', 'rmod') and (left.values == 0).any():
left = left.astype(np.float64)
right = right.astype(np.float64)

return left, right


def _arith_method_SPARSE_SERIES(cls, op, special):
"""
Expand Down Expand Up @@ -1539,8 +1615,8 @@ def _sparse_series_op(left, right, op, name):
new_name = com._maybe_match_name(left, right)

from pandas.core.sparse.array import _sparse_array_op
result = _sparse_array_op(left.values, right.values, op, name,
series=True)
lvalues, rvalues = _cast_sparse_series_op(left.values, right.values, name)
result = _sparse_array_op(lvalues, rvalues, op, name)
return left._constructor(result, index=new_index, name=new_name)


Expand All @@ -1562,7 +1638,7 @@ def wrapper(self, other):
dtype = getattr(other, 'dtype', None)
other = SparseArray(other, fill_value=self.fill_value,
dtype=dtype)
return _sparse_array_op(self, other, op, name, series=False)
return _sparse_array_op(self, other, op, name)
elif is_scalar(other):
with np.errstate(all='ignore'):
fill = op(_get_fill(self), np.asarray(other))
Expand All @@ -1575,13 +1651,3 @@ def wrapper(self, other):

wrapper.__name__ = name
return wrapper


sparse_array_special_funcs = dict(arith_method=_arith_method_SPARSE_ARRAY,
comp_method=_arith_method_SPARSE_ARRAY,
bool_method=_arith_method_SPARSE_ARRAY)

sparse_series_special_funcs = dict(arith_method=_arith_method_SPARSE_SERIES,
comp_method=_arith_method_SPARSE_SERIES,
bool_method=_bool_method_SERIES)
# TODO: I don't think the functions defined by bool_method are tested
4 changes: 2 additions & 2 deletions pandas/core/panel.py
Original file line number Diff line number Diff line change
Expand Up @@ -1527,8 +1527,8 @@ def _extract_axis(self, data, axis=0, intersect=False):
slicers={'major_axis': 'index',
'minor_axis': 'columns'})

ops.add_special_arithmetic_methods(Panel, **ops.panel_special_funcs)
ops.add_flex_arithmetic_methods(Panel, **ops.panel_flex_funcs)
ops.add_special_arithmetic_methods(Panel)
ops.add_flex_arithmetic_methods(Panel)
Panel._add_numeric_operations()


Expand Down
4 changes: 2 additions & 2 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -3092,8 +3092,8 @@ def to_period(self, freq=None, copy=True):
Series._add_series_or_dataframe_operations()

# Add arithmetic!
ops.add_flex_arithmetic_methods(Series, **ops.series_flex_funcs)
ops.add_special_arithmetic_methods(Series, **ops.series_special_funcs)
ops.add_flex_arithmetic_methods(Series)
ops.add_special_arithmetic_methods(Series)


# -----------------------------------------------------------------------------
Expand Down
14 changes: 2 additions & 12 deletions pandas/core/sparse/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,20 +53,11 @@ def _get_fill(arr):
return np.asarray(arr.fill_value)


def _sparse_array_op(left, right, op, name, series=False):
def _sparse_array_op(left, right, op, name):
if name.startswith('__'):
# For lookups in _libs.sparse we need non-dunder op name
name = name[2:-2]

if series and is_integer_dtype(left) and is_integer_dtype(right):
# series coerces to float64 if result should have NaN/inf
if name in ('floordiv', 'mod') and (right.values == 0).any():
left = left.astype(np.float64)
right = right.astype(np.float64)
elif name in ('rfloordiv', 'rmod') and (left.values == 0).any():
left = left.astype(np.float64)
right = right.astype(np.float64)

# dtype used to find corresponding sparse method
if not is_dtype_equal(left.dtype, right.dtype):
dtype = find_common_type([left.dtype, right.dtype])
Expand Down Expand Up @@ -850,5 +841,4 @@ def _make_index(length, indices, kind):
return index


ops.add_special_arithmetic_methods(SparseArray,
**ops.sparse_array_special_funcs)
ops.add_special_arithmetic_methods(SparseArray)
4 changes: 2 additions & 2 deletions pandas/core/sparse/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1014,5 +1014,5 @@ def homogenize(series_dict):


# use unaccelerated ops for sparse objects
ops.add_flex_arithmetic_methods(SparseDataFrame, **ops.frame_flex_funcs)
ops.add_special_arithmetic_methods(SparseDataFrame, **ops.frame_special_funcs)
ops.add_flex_arithmetic_methods(SparseDataFrame)
ops.add_special_arithmetic_methods(SparseDataFrame)
5 changes: 2 additions & 3 deletions pandas/core/sparse/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -812,6 +812,5 @@ def from_coo(cls, A, dense_index=False):


# overwrite series methods with unaccelerated Sparse-specific versions
ops.add_flex_arithmetic_methods(SparseSeries, **ops.series_flex_funcs)
ops.add_special_arithmetic_methods(SparseSeries,
**ops.sparse_series_special_funcs)
ops.add_flex_arithmetic_methods(SparseSeries)
ops.add_special_arithmetic_methods(SparseSeries)