Skip to content

Sparse Ops Cleanup #19782

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Feb 21, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -3995,7 +3995,7 @@ def _combine_const(self, other, func, errors='raise', try_cast=True):
try_cast=try_cast)
return self._constructor(new_data)

def _compare_frame(self, other, func, str_rep, try_cast=True):
def _compare_frame(self, other, func, str_rep):
# compare_frame assumes self._indexed_same(other)

import pandas.core.computation.expressions as expressions
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -928,7 +928,7 @@ def insert(self, loc, item):

def delete(self, loc):
"""
Make a new DatetimeIndex with passed location(s) deleted.
Make a new TimedeltaIndex with passed location(s) deleted.

Parameters
----------
Expand Down
45 changes: 19 additions & 26 deletions pandas/core/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -721,9 +721,7 @@ def add_flex_arithmetic_methods(cls, flex_arith_method, flex_comp_method=None):
subtract=new_methods['sub'],
divide=new_methods['div']))
# opt out of bool flex methods for now
for k in ('ror_', 'rxor', 'rand_'):
if k in new_methods:
new_methods.pop(k)
assert not any(kname in new_methods for kname in ('ror_', 'rxor', 'rand_'))

add_methods(cls, new_methods=new_methods)

Expand Down Expand Up @@ -1080,19 +1078,19 @@ def na_op(x, y):
try:
result = lib.scalar_binop(x, y, op)
except:
msg = ("cannot compare a dtyped [{dtype}] array "
"with a scalar of type [{type}]"
).format(dtype=x.dtype, type=type(y).__name__)
raise TypeError(msg)
raise TypeError("cannot compare a dtyped [{dtype}] array "
"with a scalar of type [{typ}]"
.format(dtype=x.dtype,
typ=type(y).__name__))

return result

fill_int = lambda x: x.fillna(0)
fill_bool = lambda x: x.fillna(False).astype(bool)

def wrapper(self, other):
is_self_int_dtype = is_integer_dtype(self.dtype)

fill_int = lambda x: x.fillna(0)
fill_bool = lambda x: x.fillna(False).astype(bool)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Moving these outside of the function scope to clarify they don't need to be re-defined on each call. _bool_method_SERIES is the topic of a separate PR, but this is sufficiently small to include in assorted-cleanups.


self, other = _align_method_SERIES(self, other, align_asobject=True)

if isinstance(other, ABCDataFrame):
Expand Down Expand Up @@ -1232,10 +1230,10 @@ def to_series(right):

elif right.ndim == 2:
if left.shape != right.shape:
msg = ("Unable to coerce to DataFrame, shape "
"must be {req_shape}: given {given_shape}"
).format(req_shape=left.shape, given_shape=right.shape)
raise ValueError(msg)
raise ValueError("Unable to coerce to DataFrame, shape "
"must be {req_shape}: given {given_shape}"
.format(req_shape=left.shape,
given_shape=right.shape))

right = left._constructor(right, index=left.index,
columns=left.columns)
Expand Down Expand Up @@ -1293,8 +1291,8 @@ def na_op(x, y):
result[mask] = op(xrav, y)
else:
raise TypeError("cannot perform operation {op} between "
"objects of type {x} and {y}".format(
op=name, x=type(x), y=type(y)))
"objects of type {x} and {y}"
.format(op=name, x=type(x), y=type(y)))

result, changed = maybe_upcast_putmask(result, ~mask, np.nan)
result = result.reshape(x.shape)
Expand Down Expand Up @@ -1355,7 +1353,7 @@ def f(self, other, axis=default_axis, level=None):
if not self._indexed_same(other):
self, other = self.align(other, 'outer',
level=level, copy=False)
return self._compare_frame(other, na_op, str_rep, try_cast=False)
return self._compare_frame(other, na_op, str_rep)

elif isinstance(other, ABCSeries):
return _combine_series_frame(self, other, na_op,
Expand All @@ -1380,7 +1378,7 @@ def f(self, other):
if not self._indexed_same(other):
raise ValueError('Can only compare identically-labeled '
'DataFrame objects')
return self._compare_frame(other, func, str_rep, try_cast=True)
return self._compare_frame(other, func, str_rep)

elif isinstance(other, ABCSeries):
return _combine_series_frame(self, other, func,
Expand Down Expand Up @@ -1532,10 +1530,6 @@ def wrapper(self, other):
.format(other=type(other)))

wrapper.__name__ = name
if name.startswith("__"):
# strip special method names, e.g. `__add__` needs to be `add` when
# passed to _sparse_series_op
name = name[2:-2]
return wrapper


Expand Down Expand Up @@ -1568,7 +1562,7 @@ def wrapper(self, other):
dtype = getattr(other, 'dtype', None)
other = SparseArray(other, fill_value=self.fill_value,
dtype=dtype)
return _sparse_array_op(self, other, op, name)
return _sparse_array_op(self, other, op, name, series=False)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this whole sparse interaction is pretty hacky (I know not addressing this now), but....

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yah, there are a couple of improvements in store for the pass after next (which is just implementing the single-call ops.add_whatever_methods(cls))

elif is_scalar(other):
with np.errstate(all='ignore'):
fill = op(_get_fill(self), np.asarray(other))
Expand All @@ -1579,8 +1573,6 @@ def wrapper(self, other):
raise TypeError('operation with {other} not supported'
.format(other=type(other)))

if name.startswith("__"):
name = name[2:-2]
wrapper.__name__ = name
return wrapper

Expand All @@ -1591,4 +1583,5 @@ def wrapper(self, other):

sparse_series_special_funcs = dict(arith_method=_arith_method_SPARSE_SERIES,
comp_method=_arith_method_SPARSE_SERIES,
bool_method=None)
bool_method=_bool_method_SERIES)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we hold off adding till then

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is necessary to get rid of the double-call in sparse.series.

# TODO: I don't think the functions defined by bool_method are tested
7 changes: 7 additions & 0 deletions pandas/core/sparse/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@ def _get_fill(arr):


def _sparse_array_op(left, right, op, name, series=False):
if name.startswith('__'):
# For lookups in _libs.sparse we need non-dunder op name
name = name[2:-2]

if series and is_integer_dtype(left) and is_integer_dtype(right):
# series coerces to float64 if result should have NaN/inf
Expand Down Expand Up @@ -119,6 +122,10 @@ def _sparse_array_op(left, right, op, name, series=False):

def _wrap_result(name, data, sparse_index, fill_value, dtype=None):
""" wrap op result to have correct dtype """
if name.startswith('__'):
# e.g. __eq__ --> eq
name = name[2:-2]

if name in ('eq', 'ne', 'lt', 'gt', 'le', 'ge'):
dtype = np.bool

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/sparse/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -551,7 +551,6 @@ def _combine_frame(self, other, func, fill_value=None, level=None):
return self._constructor(index=new_index).__finalize__(self)

new_data = {}
new_fill_value = None
if fill_value is not None:
# TODO: be a bit more intelligent here
for col in new_columns:
Expand All @@ -568,6 +567,7 @@ def _combine_frame(self, other, func, fill_value=None, level=None):
new_data[col] = func(this[col], other[col])

# if the fill values are the same use them? or use a valid one
new_fill_value = None
other_fill_value = getattr(other, 'default_fill_value', np.nan)
if self.default_fill_value == other_fill_value:
new_fill_value = self.default_fill_value
Expand Down
5 changes: 1 addition & 4 deletions pandas/core/sparse/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -811,10 +811,7 @@ def from_coo(cls, A, dense_index=False):
return _coo_to_sparse_series(A, dense_index=dense_index)


# overwrite series methods with unaccelerated versions
ops.add_special_arithmetic_methods(SparseSeries, **ops.series_special_funcs)
# overwrite series methods with unaccelerated Sparse-specific versions
ops.add_flex_arithmetic_methods(SparseSeries, **ops.series_flex_funcs)
# overwrite basic arithmetic to use SparseSeries version
# force methods to overwrite previous definitions.
ops.add_special_arithmetic_methods(SparseSeries,
**ops.sparse_series_special_funcs)