-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
Sparse Ops Cleanup #19782
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Sparse Ops Cleanup #19782
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -721,9 +721,7 @@ def add_flex_arithmetic_methods(cls, flex_arith_method, flex_comp_method=None): | |
subtract=new_methods['sub'], | ||
divide=new_methods['div'])) | ||
# opt out of bool flex methods for now | ||
for k in ('ror_', 'rxor', 'rand_'): | ||
if k in new_methods: | ||
new_methods.pop(k) | ||
assert not any(kname in new_methods for kname in ('ror_', 'rxor', 'rand_')) | ||
|
||
add_methods(cls, new_methods=new_methods) | ||
|
||
|
@@ -1080,19 +1078,19 @@ def na_op(x, y): | |
try: | ||
result = lib.scalar_binop(x, y, op) | ||
except: | ||
msg = ("cannot compare a dtyped [{dtype}] array " | ||
"with a scalar of type [{type}]" | ||
).format(dtype=x.dtype, type=type(y).__name__) | ||
raise TypeError(msg) | ||
raise TypeError("cannot compare a dtyped [{dtype}] array " | ||
"with a scalar of type [{typ}]" | ||
.format(dtype=x.dtype, | ||
typ=type(y).__name__)) | ||
|
||
return result | ||
|
||
fill_int = lambda x: x.fillna(0) | ||
fill_bool = lambda x: x.fillna(False).astype(bool) | ||
|
||
def wrapper(self, other): | ||
is_self_int_dtype = is_integer_dtype(self.dtype) | ||
|
||
fill_int = lambda x: x.fillna(0) | ||
fill_bool = lambda x: x.fillna(False).astype(bool) | ||
|
||
self, other = _align_method_SERIES(self, other, align_asobject=True) | ||
|
||
if isinstance(other, ABCDataFrame): | ||
|
@@ -1232,10 +1230,10 @@ def to_series(right): | |
|
||
elif right.ndim == 2: | ||
if left.shape != right.shape: | ||
msg = ("Unable to coerce to DataFrame, shape " | ||
"must be {req_shape}: given {given_shape}" | ||
).format(req_shape=left.shape, given_shape=right.shape) | ||
raise ValueError(msg) | ||
raise ValueError("Unable to coerce to DataFrame, shape " | ||
"must be {req_shape}: given {given_shape}" | ||
.format(req_shape=left.shape, | ||
given_shape=right.shape)) | ||
|
||
right = left._constructor(right, index=left.index, | ||
columns=left.columns) | ||
|
@@ -1293,8 +1291,8 @@ def na_op(x, y): | |
result[mask] = op(xrav, y) | ||
else: | ||
raise TypeError("cannot perform operation {op} between " | ||
"objects of type {x} and {y}".format( | ||
op=name, x=type(x), y=type(y))) | ||
"objects of type {x} and {y}" | ||
.format(op=name, x=type(x), y=type(y))) | ||
|
||
result, changed = maybe_upcast_putmask(result, ~mask, np.nan) | ||
result = result.reshape(x.shape) | ||
|
@@ -1355,7 +1353,7 @@ def f(self, other, axis=default_axis, level=None): | |
if not self._indexed_same(other): | ||
self, other = self.align(other, 'outer', | ||
level=level, copy=False) | ||
return self._compare_frame(other, na_op, str_rep, try_cast=False) | ||
return self._compare_frame(other, na_op, str_rep) | ||
|
||
elif isinstance(other, ABCSeries): | ||
return _combine_series_frame(self, other, na_op, | ||
|
@@ -1380,7 +1378,7 @@ def f(self, other): | |
if not self._indexed_same(other): | ||
raise ValueError('Can only compare identically-labeled ' | ||
'DataFrame objects') | ||
return self._compare_frame(other, func, str_rep, try_cast=True) | ||
return self._compare_frame(other, func, str_rep) | ||
|
||
elif isinstance(other, ABCSeries): | ||
return _combine_series_frame(self, other, func, | ||
|
@@ -1532,10 +1530,6 @@ def wrapper(self, other): | |
.format(other=type(other))) | ||
|
||
wrapper.__name__ = name | ||
if name.startswith("__"): | ||
# strip special method names, e.g. `__add__` needs to be `add` when | ||
# passed to _sparse_series_op | ||
name = name[2:-2] | ||
return wrapper | ||
|
||
|
||
|
@@ -1568,7 +1562,7 @@ def wrapper(self, other): | |
dtype = getattr(other, 'dtype', None) | ||
other = SparseArray(other, fill_value=self.fill_value, | ||
dtype=dtype) | ||
return _sparse_array_op(self, other, op, name) | ||
return _sparse_array_op(self, other, op, name, series=False) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this whole sparse interaction is pretty hacky (I know not addressing this now), but.... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yah, there are a couple of improvements in store for the pass after next (which is just implementing the single-call |
||
elif is_scalar(other): | ||
with np.errstate(all='ignore'): | ||
fill = op(_get_fill(self), np.asarray(other)) | ||
|
@@ -1579,8 +1573,6 @@ def wrapper(self, other): | |
raise TypeError('operation with {other} not supported' | ||
.format(other=type(other))) | ||
|
||
if name.startswith("__"): | ||
name = name[2:-2] | ||
wrapper.__name__ = name | ||
return wrapper | ||
|
||
|
@@ -1591,4 +1583,5 @@ def wrapper(self, other): | |
|
||
sparse_series_special_funcs = dict(arith_method=_arith_method_SPARSE_SERIES, | ||
comp_method=_arith_method_SPARSE_SERIES, | ||
bool_method=None) | ||
bool_method=_bool_method_SERIES) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can we hold off adding till then There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is necessary to get rid of the double-call in sparse.series. |
||
# TODO: I don't think the functions defined by bool_method are tested |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Moving these outside of the function scope to clarify they don't need to be re-defined on each call. _bool_method_SERIES is the topic of a separate PR, but this is sufficiently small to include in assorted-cleanups.