diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index e427116f8225e..dc76135a99dc0 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -867,6 +867,7 @@ Sparse - Creating a :class:`SparseArray` from timezone-aware dtype will issue a warning before dropping timezone information, instead of doing so silently (:issue:`32501`) - Bug in :meth:`arrays.SparseArray.from_spmatrix` wrongly read scipy sparse matrix (:issue:`31991`) - Bug in :meth:`Series.sum` with ``SparseArray`` raises ``TypeError`` (:issue:`25777`) +- Bug where :class:`DataFrame` containing :class:`SparseArray` filled with ``NaN`` when indexed by a list-like (:issue:`27781`, :issue:`29563`) - The repr of :class:`SparseDtype` now includes the repr of its ``fill_value`` attribute. Previously it used ``fill_value``'s string representation (:issue:`34352`) ExtensionArray diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index d880bd81bd947..e70c8f9d5f09a 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1634,7 +1634,10 @@ def _holder(self): @property def fill_value(self): # Used in reindex_indexer - return self.values.dtype.na_value + if is_sparse(self.values): + return self.values.dtype.fill_value + else: + return self.values.dtype.na_value @property def _can_hold_na(self): diff --git a/pandas/tests/extension/base/getitem.py b/pandas/tests/extension/base/getitem.py index 251376798efc3..5d0ea69007e27 100644 --- a/pandas/tests/extension/base/getitem.py +++ b/pandas/tests/extension/base/getitem.py @@ -399,3 +399,31 @@ def test_item(self, data): with pytest.raises(ValueError, match=msg): s.item() + + def test_boolean_mask_frame_fill_value(self, data): + # https://github.com/pandas-dev/pandas/issues/27781 + df = pd.DataFrame({"A": data}) + + mask = np.random.choice([True, False], df.shape[0]) + result = pd.isna(df.iloc[mask]["A"]) + expected = pd.isna(df["A"].iloc[mask]) + self.assert_series_equal(result, expected) + + mask = pd.Series(mask, index=df.index) + result = pd.isna(df.loc[mask]["A"]) + expected = pd.isna(df["A"].loc[mask]) + self.assert_series_equal(result, expected) + + def test_fancy_index_frame_fill_value(self, data): + # https://github.com/pandas-dev/pandas/issues/29563 + df = pd.DataFrame({"A": data}) + + mask = np.random.choice(df.shape[0], df.shape[0]) + result = pd.isna(df.iloc[mask]["A"]) + expected = pd.isna(df["A"].iloc[mask]) + self.assert_series_equal(result, expected) + + mask = pd.Series(mask, index=df.index) + result = pd.isna(df.loc[mask]["A"]) + expected = pd.isna(df["A"].loc[mask]) + self.assert_series_equal(result, expected) diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py index e59b3f0600867..f0a1ab7d019d5 100644 --- a/pandas/tests/extension/test_sparse.py +++ b/pandas/tests/extension/test_sparse.py @@ -41,6 +41,11 @@ def data_for_twos(request): return SparseArray(np.ones(100) * 2) +@pytest.fixture(params=[0, np.nan]) +def data_zeros(request): + return SparseArray(np.zeros(100, dtype=int), fill_value=request.param) + + @pytest.fixture(params=[0, np.nan]) def data_missing(request): """Length 2 array with [NA, Valid]"""