pandas-dev · sinhrks · Aug 13, 2016
diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt
@@ -766,6 +766,7 @@ Note that the limitation is applied to ``fill_value`` which default is ``np.nan`
 - Bug in ``SparseArray`` and ``SparseSeries`` don't apply ufunc to ``fill_value`` (:issue:`13853`)
 - Bug in ``SparseSeries.abs`` incorrectly keeps negative ``fill_value`` (:issue:`13853`)
 - Bug in single row slicing on multi-type ``SparseDataFrame``s, types were previously forced to float (:issue:`13917`)
+- Bug in sparse indexing using ``SparseArray`` with ``bool`` dtype may return incorrect result  (:issue:`13985`)
 
 .. _whatsnew_0190.deprecations:
 

diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
@@ -10,6 +10,7 @@
                                  is_list_like,
                                  is_sequence,
                                  is_scalar,
+                                 is_sparse,
                                  _ensure_platform_int)
 from pandas.types.missing import isnull, _infer_fill_value
 
@@ -1811,9 +1812,10 @@ def check_bool_indexer(ax, key):
         mask = isnull(result._values)
         if mask.any():
             raise IndexingError('Unalignable boolean Series key provided')
-
         result = result.astype(bool)._values
-
+    elif is_sparse(result):
+        result = result.to_dense()
+        result = np.asarray(result, dtype=bool)
     else:
         # is_bool_indexer has already checked for nulls in the case of an
         # object array key, so no check needed here

diff --git a/pandas/sparse/array.py b/pandas/sparse/array.py
@@ -17,6 +17,7 @@
 from pandas.types.generic import ABCSparseArray, ABCSparseSeries
 from pandas.types.common import (is_float, is_integer,
                                  is_integer_dtype, _ensure_platform_int,
+                                 is_bool_dtype,
                                  is_list_like,
                                  is_scalar, is_dtype_equal)
 from pandas.types.cast import (_possibly_convert_platform, _maybe_promote,
@@ -385,7 +386,10 @@ def __getitem__(self, key):
             data_slice = self.values[key]
         else:
             if isinstance(key, SparseArray):
-                key = np.asarray(key)
+                if is_bool_dtype(key):
+                    key = key.to_dense()
+                else:
+                    key = np.asarray(key)
 
             if hasattr(key, '__len__') and len(self) != len(key):
                 return self.take(key)

diff --git a/pandas/sparse/series.py b/pandas/sparse/series.py
@@ -609,6 +609,7 @@ def take(self, indices, axis=0, convert=True, *args, **kwargs):
         -------
         taken : ndarray
         """
+
         convert = nv.validate_take_with_convert(convert, args, kwargs)
         new_values = SparseArray.take(self.values, indices)
         new_index = self.index.take(indices)

diff --git a/pandas/sparse/tests/test_indexing.py b/pandas/sparse/tests/test_indexing.py
@@ -36,6 +36,10 @@ def test_getitem(self):
         exp = orig[orig % 2 == 1].to_sparse()
         tm.assert_sp_series_equal(result, exp)
 
+        # sparse array
+        result = sparse[pd.SparseArray(sparse % 2 == 1, dtype=bool)]
+        tm.assert_sp_series_equal(result, exp)
+
     def test_getitem_slice(self):
         orig = self.orig
         sparse = self.sparse
@@ -68,6 +72,10 @@ def test_getitem_fill_value(self):
         exp = orig[orig % 2 == 1].to_sparse(fill_value=0)
         tm.assert_sp_series_equal(result, exp)
 
+        # sparse array
+        result = sparse[pd.SparseArray(sparse % 2 == 1, dtype=bool)]
+        tm.assert_sp_series_equal(result, exp)
+
     def test_getitem_ellipsis(self):
         # GH 9467
         s = pd.SparseSeries([1, np.nan, 2, 0, np.nan])
@@ -116,6 +124,10 @@ def test_loc(self):
         exp = orig.loc[orig % 2 == 1].to_sparse()
         tm.assert_sp_series_equal(result, exp)
 
+        # sparse array
+        result = sparse.loc[pd.SparseArray(sparse % 2 == 1, dtype=bool)]
+        tm.assert_sp_series_equal(result, exp)
+
     def test_loc_index(self):
         orig = pd.Series([1, np.nan, np.nan, 3, np.nan], index=list('ABCDE'))
         sparse = orig.to_sparse()
@@ -137,6 +149,10 @@ def test_loc_index(self):
         exp = orig.loc[orig % 2 == 1].to_sparse()
         tm.assert_sp_series_equal(result, exp)
 
+        # sparse array
+        result = sparse[pd.SparseArray(sparse % 2 == 1, dtype=bool)]
+        tm.assert_sp_series_equal(result, exp)
+
     def test_loc_index_fill_value(self):
         orig = pd.Series([1, np.nan, 0, 3, 0], index=list('ABCDE'))
         sparse = orig.to_sparse(fill_value=0)
@@ -368,6 +384,35 @@ def test_reindex_fill_value(self):
         exp = orig.reindex(['A', 'E', 'C', 'D']).to_sparse(fill_value=0)
         tm.assert_sp_series_equal(res, exp)
 
+    def tests_indexing_with_sparse(self):
+        # GH 13985
+
+        for kind in ['integer', 'block']:
+            for fill in [True, False, np.nan]:
+                arr = pd.SparseArray([1, 2, 3], kind=kind)
+                indexer = pd.SparseArray([True, False, True], fill_value=fill,
+                                         dtype=bool)
+
+                tm.assert_sp_array_equal(pd.SparseArray([1, 3], kind=kind),
+                                         arr[indexer])
+
+                s = pd.SparseSeries(arr, index=['a', 'b', 'c'],
+                                    dtype=np.float64)
+                exp = pd.SparseSeries([1, 3], index=['a', 'c'],
+                                      dtype=np.float64, kind=kind)
+                tm.assert_sp_series_equal(s[indexer], exp)
+                tm.assert_sp_series_equal(s.loc[indexer], exp)
+                tm.assert_sp_series_equal(s.iloc[indexer], exp)
+
+                indexer = pd.SparseSeries(indexer, index=['a', 'b', 'c'])
+                tm.assert_sp_series_equal(s[indexer], exp)
+                tm.assert_sp_series_equal(s.loc[indexer], exp)
+
+                msg = ("iLocation based boolean indexing cannot use an "
+                       "indexable as a mask")
+                with tm.assertRaisesRegexp(ValueError, msg):
+                    s.iloc[indexer]
+
 
 class TestSparseSeriesMultiIndexing(TestSparseSeriesIndexing):
 
@@ -405,6 +450,10 @@ def test_getitem_multi(self):
         exp = orig[orig % 2 == 1].to_sparse()
         tm.assert_sp_series_equal(result, exp)
 
+        # sparse array
+        result = sparse[pd.SparseArray(sparse % 2 == 1, dtype=bool)]
+        tm.assert_sp_series_equal(result, exp)
+
     def test_getitem_multi_tuple(self):
         orig = self.orig
         sparse = self.sparse
@@ -454,6 +503,10 @@ def test_loc(self):
         exp = orig.loc[orig % 2 == 1].to_sparse()
         tm.assert_sp_series_equal(result, exp)
 
+        # sparse array
+        result = sparse.loc[pd.SparseArray(sparse % 2 == 1, dtype=bool)]
+        tm.assert_sp_series_equal(result, exp)
+
     def test_loc_multi_tuple(self):
         orig = self.orig
         sparse = self.sparse
@@ -578,6 +631,10 @@ def test_loc(self):
         exp = orig.loc[orig.x % 2 == 1].to_sparse()
         tm.assert_sp_frame_equal(result, exp)
 
+        # sparse array
+        result = sparse.loc[pd.SparseArray(sparse.x % 2 == 1, dtype=bool)]
+        tm.assert_sp_frame_equal(result, exp)
+
     def test_loc_index(self):
         orig = pd.DataFrame([[1, np.nan, np.nan],
                              [2, 3, np.nan],
@@ -627,6 +684,10 @@ def test_loc_index(self):
         exp = orig.loc[orig.x % 2 == 1].to_sparse()
         tm.assert_sp_frame_equal(result, exp)
 
+        # sparse array
+        result = sparse.loc[pd.SparseArray(sparse.x % 2 == 1, dtype=bool)]
+        tm.assert_sp_frame_equal(result, exp)
+
     def test_loc_slice(self):
         orig = pd.DataFrame([[1, np.nan, np.nan],
                              [2, 3, np.nan],