pandas-dev · jreback · Sep 21, 2021 · Sep 19, 2021 · Sep 19, 2021 · Sep 19, 2021
diff --git a/asv_bench/benchmarks/sparse.py b/asv_bench/benchmarks/sparse.py
@@ -180,4 +180,19 @@ def time_min_max(self, func, fill_value):
         getattr(self.sp_arr, func)()
 
 
+class Take:
+
+    params = ([np.array([0]), np.arange(100_000), np.full(100_000, -1)], [True, False])
+    param_names = ["indices", "allow_fill"]
+
+    def setup(self, indices, allow_fill):
+        N = 1_000_000
+        fill_value = 0.0
+        arr = make_array(N, 1e-5, fill_value, np.float64)
+        self.sp_arr = SparseArray(arr, fill_value=fill_value)
+
+    def time_take(self, indices, allow_fill):
+        self.sp_arr.take(indices, allow_fill=allow_fill)
+
+
 from .pandas_vb_common import setup  # noqa: F401 isort:skip
diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
@@ -354,6 +354,7 @@ Performance improvements
 - Performance improvement in indexing with a :class:`MultiIndex` indexer on another :class:`MultiIndex` (:issue:43370`)
 - Performance improvement in :meth:`GroupBy.quantile` (:issue:`43469`)
 - :meth:`SparseArray.min` and :meth:`SparseArray.max` no longer require converting to a dense array (:issue:`43526`)
+- Performance improvement in :meth:`SparseArray.take` with ``allow_fill=False`` (:issue:`?`)
 -
 
 .. ---------------------------------------------------------------------------

diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py
@@ -953,10 +953,7 @@ def take(
         elif allow_fill:
             result = self._take_with_fill(indices, fill_value=fill_value)
         else:
-            # error: Incompatible types in assignment (expression has type
-            # "Union[ndarray, SparseArray]", variable has type "ndarray")
-            result = self._take_without_fill(indices)  # type: ignore[assignment]
-            dtype = self.dtype
+            return self._take_without_fill(indices)
 
         return type(self)(
             result, fill_value=self.fill_value, kind=self.kind, dtype=dtype
@@ -1027,9 +1024,8 @@ def _take_with_fill(self, indices, fill_value=None) -> np.ndarray:
 
         return taken
 
-    def _take_without_fill(self, indices) -> np.ndarray | SparseArray:
+    def _take_without_fill(self: SparseArrayT, indices) -> SparseArrayT:
         to_shift = indices < 0
-        indices = indices.copy()
 
         n = len(self)
 
@@ -1040,30 +1036,17 @@ def _take_without_fill(self, indices) -> np.ndarray | SparseArray:
                 raise IndexError("out of bounds value in 'indices'.")
 
         if to_shift.any():
+            indices = indices.copy()
             indices[to_shift] += n
 
-        if self.sp_index.npoints == 0:
-            # edge case in take...
-            # I think just return
-            out = np.full(
-                indices.shape,
-                self.fill_value,
-                dtype=np.result_type(type(self.fill_value)),
-            )
-            arr, sp_index, fill_value = make_sparse(out, fill_value=self.fill_value)
-            return type(self)(arr, sparse_index=sp_index, fill_value=fill_value)
-
         sp_indexer = self.sp_index.lookup_array(indices)
-        taken = self.sp_values.take(sp_indexer)
-        fillable = sp_indexer < 0
+        value_mask = sp_indexer != -1
+        new_sp_values = self.sp_values[sp_indexer[value_mask]]
 
-        if fillable.any():
-            # TODO: may need to coerce array to fill value
-            result_type = np.result_type(taken, type(self.fill_value))
-            taken = taken.astype(result_type)
-            taken[fillable] = self.fill_value
+        value_indices = np.flatnonzero(value_mask).astype(np.int32, copy=False)
 
-        return taken
+        new_sp_index = make_sparse_index(len(indices), value_indices, kind=self.kind)
+        return type(self)._simple_new(new_sp_values, new_sp_index, dtype=self.dtype)
 
     def searchsorted(
         self,

diff --git a/pandas/tests/arrays/sparse/test_array.py b/pandas/tests/arrays/sparse/test_array.py
@@ -382,15 +382,15 @@ def test_take_filling_fill_value(self):
         with pytest.raises(IndexError, match=msg):
             sparse.take(np.array([1, 5]), fill_value=True)
 
-    def test_take_filling_all_nan(self):
-        sparse = SparseArray([np.nan, np.nan, np.nan, np.nan, np.nan])
-        # XXX: did the default kind from take change?
+    @pytest.mark.parametrize("kind", ["block", "integer"])
+    def test_take_filling_all_nan(self, kind):
+        sparse = SparseArray([np.nan, np.nan, np.nan, np.nan, np.nan], kind=kind)
         result = sparse.take(np.array([1, 0, -1]))
-        expected = SparseArray([np.nan, np.nan, np.nan], kind="block")
+        expected = SparseArray([np.nan, np.nan, np.nan], kind=kind)
         tm.assert_sp_array_equal(result, expected)
 
         result = sparse.take(np.array([1, 0, -1]), fill_value=True)
-        expected = SparseArray([np.nan, np.nan, np.nan], kind="block")
+        expected = SparseArray([np.nan, np.nan, np.nan], kind=kind)
         tm.assert_sp_array_equal(result, expected)
 
         msg = "out of bounds value in 'indices'"