Skip to content

Backport PR #45125 on branch 1.4.x (BUG: Operations with SparseArray return SA with wrong indices) #45275

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions pandas/core/arrays/sparse/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -1704,13 +1704,14 @@ def _cmp_method(self, other, op) -> SparseArray:
op_name = op.__name__.strip("_")
return _sparse_array_op(self, other, op, op_name)
else:
# scalar
with np.errstate(all="ignore"):
fill_value = op(self.fill_value, other)
result = op(self.sp_values, other)
result = np.full(len(self), fill_value, dtype=np.bool_)
result[self.sp_index.indices] = op(self.sp_values, other)

return type(self)(
result,
sparse_index=self.sp_index,
fill_value=fill_value,
dtype=np.bool_,
)
Expand Down
1 change: 1 addition & 0 deletions pandas/tests/arrays/sparse/test_arithmetics.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ class TestSparseArrayArithmetics:
_klass = SparseArray

def _assert(self, a, b):
# We have to use tm.assert_sp_array_equal. See GH #45126
tm.assert_numpy_array_equal(a, b)

def _check_numeric_ops(self, a, b, a_dense, b_dense, mix: bool, op):
Expand Down
9 changes: 8 additions & 1 deletion pandas/tests/arrays/sparse/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,8 +248,8 @@ def test_scalar_with_index_infer_dtype(self, scalar, dtype):
assert arr.dtype == dtype
assert exp.dtype == dtype

# GH 23122
def test_getitem_bool_sparse_array(self):
# GH 23122
spar_bool = SparseArray([False, True] * 5, dtype=np.bool8, fill_value=True)
exp = SparseArray([np.nan, 2, np.nan, 5, 6])
tm.assert_sp_array_equal(self.arr[spar_bool], exp)
Expand All @@ -266,6 +266,13 @@ def test_getitem_bool_sparse_array(self):
exp = SparseArray([np.nan, 3, 5])
tm.assert_sp_array_equal(res, exp)

def test_getitem_bool_sparse_array_as_comparison(self):
# GH 45110
arr = SparseArray([1, 2, 3, 4, np.nan, np.nan], fill_value=np.nan)
res = arr[arr > 2]
exp = SparseArray([3.0, 4.0], fill_value=np.nan)
tm.assert_sp_array_equal(res, exp)

def test_get_item(self):

assert np.isnan(self.arr[1])
Expand Down
60 changes: 39 additions & 21 deletions pandas/tests/extension/test_sparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,11 @@ def data_for_grouping(request):
return SparseArray([1, 1, np.nan, np.nan, 2, 2, 1, 3], fill_value=request.param)


@pytest.fixture(params=[0, np.nan])
def data_for_compare(request):
return SparseArray([0, 0, np.nan, -2, -1, 4, 2, 3, 0, 0], fill_value=request.param)


class BaseSparseTests:
def _check_unsupported(self, data):
if data.dtype == SparseDtype(int, 0):
Expand Down Expand Up @@ -461,32 +466,45 @@ def _check_divmod_op(self, ser, op, other, exc=NotImplementedError):
super()._check_divmod_op(ser, op, other, exc=None)


class TestComparisonOps(BaseSparseTests, base.BaseComparisonOpsTests):
def _compare_other(self, s, data, comparison_op, other):
class TestComparisonOps(BaseSparseTests):
def _compare_other(self, data_for_compare: SparseArray, comparison_op, other):
op = comparison_op

# array
result = pd.Series(op(data, other))
# hard to test the fill value, since we don't know what expected
# is in general.
# Rely on tests in `tests/sparse` to validate that.
assert isinstance(result.dtype, SparseDtype)
assert result.dtype.subtype == np.dtype("bool")

with np.errstate(all="ignore"):
expected = pd.Series(
SparseArray(
op(np.asarray(data), np.asarray(other)),
fill_value=result.values.fill_value,
)
result = op(data_for_compare, other)
assert isinstance(result, SparseArray)
assert result.dtype.subtype == np.bool_

if isinstance(other, SparseArray):
fill_value = op(data_for_compare.fill_value, other.fill_value)
else:
fill_value = np.all(
op(np.asarray(data_for_compare.fill_value), np.asarray(other))
)

tm.assert_series_equal(result, expected)
expected = SparseArray(
op(data_for_compare.to_dense(), np.asarray(other)),
fill_value=fill_value,
dtype=np.bool_,
)
tm.assert_sp_array_equal(result, expected)

# series
ser = pd.Series(data)
result = op(ser, other)
tm.assert_series_equal(result, expected)
def test_scalar(self, data_for_compare: SparseArray, comparison_op):
self._compare_other(data_for_compare, comparison_op, 0)
self._compare_other(data_for_compare, comparison_op, 1)
self._compare_other(data_for_compare, comparison_op, -1)
self._compare_other(data_for_compare, comparison_op, np.nan)

@pytest.mark.xfail(reason="Wrong indices")
def test_array(self, data_for_compare: SparseArray, comparison_op):
arr = np.linspace(-4, 5, 10)
self._compare_other(data_for_compare, comparison_op, arr)

@pytest.mark.xfail(reason="Wrong indices")
def test_sparse_array(self, data_for_compare: SparseArray, comparison_op):
arr = data_for_compare + 1
self._compare_other(data_for_compare, comparison_op, arr)
arr = data_for_compare * 2
self._compare_other(data_for_compare, comparison_op, arr)


class TestPrinting(BaseSparseTests, base.BasePrintingTests):
Expand Down