From 5188e035d1ce8d442cd20134a7dd0073242c4731 Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Sun, 17 May 2020 18:23:41 -0400 Subject: [PATCH] patch ExtensionBlock fill_value, closes #27781, closes #29563 --- doc/source/whatsnew/v1.1.0.rst | 1 + pandas/core/internals/blocks.py | 5 ++++- pandas/tests/extension/base/getitem.py | 28 ++++++++++++++++++++++++++ pandas/tests/extension/test_sparse.py | 5 +++++ 4 files changed, 38 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index a3499f857d158..14c780b38b895 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -857,6 +857,7 @@ Sparse - Creating a :class:`SparseArray` from timezone-aware dtype will issue a warning before dropping timezone information, instead of doing so silently (:issue:`32501`) - Bug in :meth:`arrays.SparseArray.from_spmatrix` wrongly read scipy sparse matrix (:issue:`31991`) - Bug in :meth:`Series.sum` with ``SparseArray`` raises ``TypeError`` (:issue:`25777`) +- Bug where :class:`DataFrame` containing :class:`SparseArray` filled with ``NaN`` when indexed by a list-like (:issue:`27781`, :issue:`29563`) ExtensionArray ^^^^^^^^^^^^^^ diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index c052c6c9d7d1d..fee8937d01be7 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1627,7 +1627,10 @@ def _holder(self): @property def fill_value(self): # Used in reindex_indexer - return self.values.dtype.na_value + if is_sparse(self.values): + return self.values.dtype.fill_value + else: + return self.values.dtype.na_value @property def _can_hold_na(self): diff --git a/pandas/tests/extension/base/getitem.py b/pandas/tests/extension/base/getitem.py index 251376798efc3..5d0ea69007e27 100644 --- a/pandas/tests/extension/base/getitem.py +++ b/pandas/tests/extension/base/getitem.py @@ -399,3 +399,31 @@ def test_item(self, data): with pytest.raises(ValueError, match=msg): s.item() + + def test_boolean_mask_frame_fill_value(self, data): + # https://github.com/pandas-dev/pandas/issues/27781 + df = pd.DataFrame({"A": data}) + + mask = np.random.choice([True, False], df.shape[0]) + result = pd.isna(df.iloc[mask]["A"]) + expected = pd.isna(df["A"].iloc[mask]) + self.assert_series_equal(result, expected) + + mask = pd.Series(mask, index=df.index) + result = pd.isna(df.loc[mask]["A"]) + expected = pd.isna(df["A"].loc[mask]) + self.assert_series_equal(result, expected) + + def test_fancy_index_frame_fill_value(self, data): + # https://github.com/pandas-dev/pandas/issues/29563 + df = pd.DataFrame({"A": data}) + + mask = np.random.choice(df.shape[0], df.shape[0]) + result = pd.isna(df.iloc[mask]["A"]) + expected = pd.isna(df["A"].iloc[mask]) + self.assert_series_equal(result, expected) + + mask = pd.Series(mask, index=df.index) + result = pd.isna(df.loc[mask]["A"]) + expected = pd.isna(df["A"].loc[mask]) + self.assert_series_equal(result, expected) diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py index e59b3f0600867..f0a1ab7d019d5 100644 --- a/pandas/tests/extension/test_sparse.py +++ b/pandas/tests/extension/test_sparse.py @@ -41,6 +41,11 @@ def data_for_twos(request): return SparseArray(np.ones(100) * 2) +@pytest.fixture(params=[0, np.nan]) +def data_zeros(request): + return SparseArray(np.zeros(100, dtype=int), fill_value=request.param) + + @pytest.fixture(params=[0, np.nan]) def data_missing(request): """Length 2 array with [NA, Valid]"""