Skip to content

Commit 1e3ff82

Browse files
jbrockmendelharisbal
authored and
harisbal
committed
Sparse Ops Cleanup (pandas-dev#19782)
1 parent cca6300 commit 1e3ff82

File tree

6 files changed

+30
-33
lines changed

6 files changed

+30
-33
lines changed

pandas/core/frame.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -3995,7 +3995,7 @@ def _combine_const(self, other, func, errors='raise', try_cast=True):
39953995
try_cast=try_cast)
39963996
return self._constructor(new_data)
39973997

3998-
def _compare_frame(self, other, func, str_rep, try_cast=True):
3998+
def _compare_frame(self, other, func, str_rep):
39993999
# compare_frame assumes self._indexed_same(other)
40004000

40014001
import pandas.core.computation.expressions as expressions

pandas/core/indexes/timedeltas.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -928,7 +928,7 @@ def insert(self, loc, item):
928928

929929
def delete(self, loc):
930930
"""
931-
Make a new DatetimeIndex with passed location(s) deleted.
931+
Make a new TimedeltaIndex with passed location(s) deleted.
932932
933933
Parameters
934934
----------

pandas/core/ops.py

+19-26
Original file line numberDiff line numberDiff line change
@@ -721,9 +721,7 @@ def add_flex_arithmetic_methods(cls, flex_arith_method, flex_comp_method=None):
721721
subtract=new_methods['sub'],
722722
divide=new_methods['div']))
723723
# opt out of bool flex methods for now
724-
for k in ('ror_', 'rxor', 'rand_'):
725-
if k in new_methods:
726-
new_methods.pop(k)
724+
assert not any(kname in new_methods for kname in ('ror_', 'rxor', 'rand_'))
727725

728726
add_methods(cls, new_methods=new_methods)
729727

@@ -1080,19 +1078,19 @@ def na_op(x, y):
10801078
try:
10811079
result = lib.scalar_binop(x, y, op)
10821080
except:
1083-
msg = ("cannot compare a dtyped [{dtype}] array "
1084-
"with a scalar of type [{type}]"
1085-
).format(dtype=x.dtype, type=type(y).__name__)
1086-
raise TypeError(msg)
1081+
raise TypeError("cannot compare a dtyped [{dtype}] array "
1082+
"with a scalar of type [{typ}]"
1083+
.format(dtype=x.dtype,
1084+
typ=type(y).__name__))
10871085

10881086
return result
10891087

1088+
fill_int = lambda x: x.fillna(0)
1089+
fill_bool = lambda x: x.fillna(False).astype(bool)
1090+
10901091
def wrapper(self, other):
10911092
is_self_int_dtype = is_integer_dtype(self.dtype)
10921093

1093-
fill_int = lambda x: x.fillna(0)
1094-
fill_bool = lambda x: x.fillna(False).astype(bool)
1095-
10961094
self, other = _align_method_SERIES(self, other, align_asobject=True)
10971095

10981096
if isinstance(other, ABCDataFrame):
@@ -1232,10 +1230,10 @@ def to_series(right):
12321230

12331231
elif right.ndim == 2:
12341232
if left.shape != right.shape:
1235-
msg = ("Unable to coerce to DataFrame, shape "
1236-
"must be {req_shape}: given {given_shape}"
1237-
).format(req_shape=left.shape, given_shape=right.shape)
1238-
raise ValueError(msg)
1233+
raise ValueError("Unable to coerce to DataFrame, shape "
1234+
"must be {req_shape}: given {given_shape}"
1235+
.format(req_shape=left.shape,
1236+
given_shape=right.shape))
12391237

12401238
right = left._constructor(right, index=left.index,
12411239
columns=left.columns)
@@ -1293,8 +1291,8 @@ def na_op(x, y):
12931291
result[mask] = op(xrav, y)
12941292
else:
12951293
raise TypeError("cannot perform operation {op} between "
1296-
"objects of type {x} and {y}".format(
1297-
op=name, x=type(x), y=type(y)))
1294+
"objects of type {x} and {y}"
1295+
.format(op=name, x=type(x), y=type(y)))
12981296

12991297
result, changed = maybe_upcast_putmask(result, ~mask, np.nan)
13001298
result = result.reshape(x.shape)
@@ -1355,7 +1353,7 @@ def f(self, other, axis=default_axis, level=None):
13551353
if not self._indexed_same(other):
13561354
self, other = self.align(other, 'outer',
13571355
level=level, copy=False)
1358-
return self._compare_frame(other, na_op, str_rep, try_cast=False)
1356+
return self._compare_frame(other, na_op, str_rep)
13591357

13601358
elif isinstance(other, ABCSeries):
13611359
return _combine_series_frame(self, other, na_op,
@@ -1380,7 +1378,7 @@ def f(self, other):
13801378
if not self._indexed_same(other):
13811379
raise ValueError('Can only compare identically-labeled '
13821380
'DataFrame objects')
1383-
return self._compare_frame(other, func, str_rep, try_cast=True)
1381+
return self._compare_frame(other, func, str_rep)
13841382

13851383
elif isinstance(other, ABCSeries):
13861384
return _combine_series_frame(self, other, func,
@@ -1532,10 +1530,6 @@ def wrapper(self, other):
15321530
.format(other=type(other)))
15331531

15341532
wrapper.__name__ = name
1535-
if name.startswith("__"):
1536-
# strip special method names, e.g. `__add__` needs to be `add` when
1537-
# passed to _sparse_series_op
1538-
name = name[2:-2]
15391533
return wrapper
15401534

15411535

@@ -1568,7 +1562,7 @@ def wrapper(self, other):
15681562
dtype = getattr(other, 'dtype', None)
15691563
other = SparseArray(other, fill_value=self.fill_value,
15701564
dtype=dtype)
1571-
return _sparse_array_op(self, other, op, name)
1565+
return _sparse_array_op(self, other, op, name, series=False)
15721566
elif is_scalar(other):
15731567
with np.errstate(all='ignore'):
15741568
fill = op(_get_fill(self), np.asarray(other))
@@ -1579,8 +1573,6 @@ def wrapper(self, other):
15791573
raise TypeError('operation with {other} not supported'
15801574
.format(other=type(other)))
15811575

1582-
if name.startswith("__"):
1583-
name = name[2:-2]
15841576
wrapper.__name__ = name
15851577
return wrapper
15861578

@@ -1591,4 +1583,5 @@ def wrapper(self, other):
15911583

15921584
sparse_series_special_funcs = dict(arith_method=_arith_method_SPARSE_SERIES,
15931585
comp_method=_arith_method_SPARSE_SERIES,
1594-
bool_method=None)
1586+
bool_method=_bool_method_SERIES)
1587+
# TODO: I don't think the functions defined by bool_method are tested

pandas/core/sparse/array.py

+7
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,9 @@ def _get_fill(arr):
5454

5555

5656
def _sparse_array_op(left, right, op, name, series=False):
57+
if name.startswith('__'):
58+
# For lookups in _libs.sparse we need non-dunder op name
59+
name = name[2:-2]
5760

5861
if series and is_integer_dtype(left) and is_integer_dtype(right):
5962
# series coerces to float64 if result should have NaN/inf
@@ -119,6 +122,10 @@ def _sparse_array_op(left, right, op, name, series=False):
119122

120123
def _wrap_result(name, data, sparse_index, fill_value, dtype=None):
121124
""" wrap op result to have correct dtype """
125+
if name.startswith('__'):
126+
# e.g. __eq__ --> eq
127+
name = name[2:-2]
128+
122129
if name in ('eq', 'ne', 'lt', 'gt', 'le', 'ge'):
123130
dtype = np.bool
124131

pandas/core/sparse/frame.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -551,7 +551,6 @@ def _combine_frame(self, other, func, fill_value=None, level=None):
551551
return self._constructor(index=new_index).__finalize__(self)
552552

553553
new_data = {}
554-
new_fill_value = None
555554
if fill_value is not None:
556555
# TODO: be a bit more intelligent here
557556
for col in new_columns:
@@ -568,6 +567,7 @@ def _combine_frame(self, other, func, fill_value=None, level=None):
568567
new_data[col] = func(this[col], other[col])
569568

570569
# if the fill values are the same use them? or use a valid one
570+
new_fill_value = None
571571
other_fill_value = getattr(other, 'default_fill_value', np.nan)
572572
if self.default_fill_value == other_fill_value:
573573
new_fill_value = self.default_fill_value

pandas/core/sparse/series.py

+1-4
Original file line numberDiff line numberDiff line change
@@ -811,10 +811,7 @@ def from_coo(cls, A, dense_index=False):
811811
return _coo_to_sparse_series(A, dense_index=dense_index)
812812

813813

814-
# overwrite series methods with unaccelerated versions
815-
ops.add_special_arithmetic_methods(SparseSeries, **ops.series_special_funcs)
814+
# overwrite series methods with unaccelerated Sparse-specific versions
816815
ops.add_flex_arithmetic_methods(SparseSeries, **ops.series_flex_funcs)
817-
# overwrite basic arithmetic to use SparseSeries version
818-
# force methods to overwrite previous definitions.
819816
ops.add_special_arithmetic_methods(SparseSeries,
820817
**ops.sparse_series_special_funcs)

0 commit comments

Comments
 (0)