-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
PERF: sparse take #43654
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
PERF: sparse take #43654
Changes from 5 commits
c54b60a
3a6c3e4
daadddc
4de1fe2
9ceb4fa
58efeb5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -953,10 +953,7 @@ def take( | |
elif allow_fill: | ||
result = self._take_with_fill(indices, fill_value=fill_value) | ||
else: | ||
# error: Incompatible types in assignment (expression has type | ||
# "Union[ndarray, SparseArray]", variable has type "ndarray") | ||
result = self._take_without_fill(indices) # type: ignore[assignment] | ||
dtype = self.dtype | ||
return self._take_without_fill(indices) | ||
|
||
return type(self)( | ||
result, fill_value=self.fill_value, kind=self.kind, dtype=dtype | ||
|
@@ -1027,9 +1024,8 @@ def _take_with_fill(self, indices, fill_value=None) -> np.ndarray: | |
|
||
return taken | ||
|
||
def _take_without_fill(self, indices) -> np.ndarray | SparseArray: | ||
def _take_without_fill(self: SparseArrayT, indices) -> SparseArrayT: | ||
to_shift = indices < 0 | ||
indices = indices.copy() | ||
|
||
n = len(self) | ||
|
||
|
@@ -1040,30 +1036,17 @@ def _take_without_fill(self, indices) -> np.ndarray | SparseArray: | |
raise IndexError("out of bounds value in 'indices'.") | ||
|
||
if to_shift.any(): | ||
indices = indices.copy() | ||
jreback marked this conversation as resolved.
Show resolved
Hide resolved
|
||
indices[to_shift] += n | ||
|
||
if self.sp_index.npoints == 0: | ||
# edge case in take... | ||
# I think just return | ||
out = np.full( | ||
indices.shape, | ||
self.fill_value, | ||
dtype=np.result_type(type(self.fill_value)), | ||
) | ||
arr, sp_index, fill_value = make_sparse(out, fill_value=self.fill_value) | ||
return type(self)(arr, sparse_index=sp_index, fill_value=fill_value) | ||
|
||
sp_indexer = self.sp_index.lookup_array(indices) | ||
taken = self.sp_values.take(sp_indexer) | ||
fillable = sp_indexer < 0 | ||
value_mask = sp_indexer != -1 | ||
new_sp_values = self.sp_values[sp_indexer[value_mask]] | ||
|
||
if fillable.any(): | ||
# TODO: may need to coerce array to fill value | ||
result_type = np.result_type(taken, type(self.fill_value)) | ||
taken = taken.astype(result_type) | ||
taken[fillable] = self.fill_value | ||
value_indices = np.flatnonzero(value_mask).astype(np.int32, copy=False) | ||
|
||
return taken | ||
new_sp_index = make_sparse_index(len(indices), value_indices, kind=self.kind) | ||
return type(self)._simple_new(new_sp_values, new_sp_index, dtype=self.dtype) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Main reasoning for the simplifications here keeping same behavior is that |
||
|
||
def searchsorted( | ||
self, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -382,15 +382,15 @@ def test_take_filling_fill_value(self): | |
with pytest.raises(IndexError, match=msg): | ||
sparse.take(np.array([1, 5]), fill_value=True) | ||
|
||
def test_take_filling_all_nan(self): | ||
sparse = SparseArray([np.nan, np.nan, np.nan, np.nan, np.nan]) | ||
# XXX: did the default kind from take change? | ||
@pytest.mark.parametrize("kind", ["block", "integer"]) | ||
def test_take_filling_all_nan(self, kind): | ||
sparse = SparseArray([np.nan, np.nan, np.nan, np.nan, np.nan], kind=kind) | ||
result = sparse.take(np.array([1, 0, -1])) | ||
expected = SparseArray([np.nan, np.nan, np.nan], kind="block") | ||
expected = SparseArray([np.nan, np.nan, np.nan], kind=kind) | ||
tm.assert_sp_array_equal(result, expected) | ||
|
||
result = sparse.take(np.array([1, 0, -1]), fill_value=True) | ||
expected = SparseArray([np.nan, np.nan, np.nan], kind="block") | ||
expected = SparseArray([np.nan, np.nan, np.nan], kind=kind) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In all other |
||
tm.assert_sp_array_equal(result, expected) | ||
|
||
msg = "out of bounds value in 'indices'" | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
fill in ?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks!