From 4bc01a16aa7bd65342132b994cb07eea903d4ed7 Mon Sep 17 00:00:00 2001 From: keitakurita Date: Sat, 29 Apr 2017 14:31:41 +0900 Subject: [PATCH 1/3] BUG: Made SparseDataFrame.fillna() fill all NaNs --- pandas/core/sparse/array.py | 14 ++++++-------- pandas/tests/sparse/test_frame.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 8 deletions(-) diff --git a/pandas/core/sparse/array.py b/pandas/core/sparse/array.py index 5c1cf8c773501..9025f248e26b6 100644 --- a/pandas/core/sparse/array.py +++ b/pandas/core/sparse/array.py @@ -595,14 +595,12 @@ def fillna(self, value, downcast=None): if issubclass(self.dtype.type, np.floating): value = float(value) - if self._null_fill_value: - return self._simple_new(self.sp_values, self.sp_index, - fill_value=value) - else: - new_values = self.sp_values.copy() - new_values[isnull(new_values)] = value - return self._simple_new(new_values, self.sp_index, - fill_value=self.fill_value) + new_values = self.sp_values.copy() + new_values[isnull(new_values)] = value + fill_value = value if isnull(self.fill_value) else self.fill_value + + return self._simple_new(new_values, self.sp_index, + fill_value=fill_value) def sum(self, axis=0, *args, **kwargs): """ diff --git a/pandas/tests/sparse/test_frame.py b/pandas/tests/sparse/test_frame.py index 654d12b782f37..5d75616464fa1 100644 --- a/pandas/tests/sparse/test_frame.py +++ b/pandas/tests/sparse/test_frame.py @@ -1252,6 +1252,7 @@ def test_from_scipy_correct_ordering(spmatrix): tm.skip_if_no_package('scipy') arr = np.arange(1, 5).reshape(2, 2) + try: spm = spmatrix(arr) assert spm.dtype == arr.dtype @@ -1267,6 +1268,33 @@ def test_from_scipy_correct_ordering(spmatrix): tm.assert_frame_equal(sdf.to_dense(), expected.to_dense()) +def test_from_scipy_object_fillna(spmatrix): + # GH 16112 + tm.skip_if_no_package('scipy', max_version='0.19.0') + + arr = np.eye(3) + arr[1:, 0] = np.nan + + try: + spm = spmatrix(arr) + assert spm.dtype == arr.dtype + except (TypeError, AssertionError): + # If conversion to sparse fails for this spmatrix type and arr.dtype, + # then the combination is not currently supported in NumPy, so we + # can just skip testing it thoroughly + return + + sdf = pd.SparseDataFrame(spm).fillna(-1.0) + + # Returning frame should fill all nan values with -1.0 + expected = pd.SparseDataFrame({0: {0: 1.0, 1: np.nan, 2: np.nan}, + 1: {0: np.nan, 1: 1.0, 2: np.nan}, + 2: {0: np.nan, 1: np.nan, 2: 1.0}} + ).fillna(-1.0) + + tm.assert_frame_equal(sdf.to_dense(), expected.to_dense()) + + class TestSparseDataFrameArithmetic(object): def test_numeric_op_scalar(self): From 297423209cdc5b157f8e1ae2468ee547b51ae043 Mon Sep 17 00:00:00 2001 From: Kernc Date: Wed, 12 Jul 2017 15:14:09 +0200 Subject: [PATCH 2/3] fixup! BUG: Made SparseDataFrame.fillna() fill all NaNs --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/sparse/array.py | 5 ++--- pandas/tests/sparse/test_frame.py | 14 ++++++-------- 3 files changed, 9 insertions(+), 11 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 015fdf1f45f47..e628578de9894 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -178,6 +178,7 @@ Groupby/Resample/Rolling Sparse ^^^^^^ +- Bug in :func:`SparseDataFrame.fillna` not filling all NaNs when frame was instantiated from SciPy sparse matrix (:issue:`16112`) Reshaping diff --git a/pandas/core/sparse/array.py b/pandas/core/sparse/array.py index 9025f248e26b6..42fc5189eebd8 100644 --- a/pandas/core/sparse/array.py +++ b/pandas/core/sparse/array.py @@ -595,9 +595,8 @@ def fillna(self, value, downcast=None): if issubclass(self.dtype.type, np.floating): value = float(value) - new_values = self.sp_values.copy() - new_values[isnull(new_values)] = value - fill_value = value if isnull(self.fill_value) else self.fill_value + new_values = np.where(isnull(self.sp_values), value, self.sp_values) + fill_value = value if self._null_fill_value else self.fill_value return self._simple_new(new_values, self.sp_index, fill_value=fill_value) diff --git a/pandas/tests/sparse/test_frame.py b/pandas/tests/sparse/test_frame.py index 5d75616464fa1..619e646fbfff3 100644 --- a/pandas/tests/sparse/test_frame.py +++ b/pandas/tests/sparse/test_frame.py @@ -1252,7 +1252,6 @@ def test_from_scipy_correct_ordering(spmatrix): tm.skip_if_no_package('scipy') arr = np.arange(1, 5).reshape(2, 2) - try: spm = spmatrix(arr) assert spm.dtype == arr.dtype @@ -1268,9 +1267,9 @@ def test_from_scipy_correct_ordering(spmatrix): tm.assert_frame_equal(sdf.to_dense(), expected.to_dense()) -def test_from_scipy_object_fillna(spmatrix): +def test_from_scipy_fillna(spmatrix): # GH 16112 - tm.skip_if_no_package('scipy', max_version='0.19.0') + tm.skip_if_no_package('scipy') arr = np.eye(3) arr[1:, 0] = np.nan @@ -1287,12 +1286,11 @@ def test_from_scipy_object_fillna(spmatrix): sdf = pd.SparseDataFrame(spm).fillna(-1.0) # Returning frame should fill all nan values with -1.0 - expected = pd.SparseDataFrame({0: {0: 1.0, 1: np.nan, 2: np.nan}, - 1: {0: np.nan, 1: 1.0, 2: np.nan}, - 2: {0: np.nan, 1: np.nan, 2: 1.0}} - ).fillna(-1.0) + expected = pd.SparseDataFrame([[1, -1, -1], + [-1, 1, -1], + [-1, -1, 1.]]) - tm.assert_frame_equal(sdf.to_dense(), expected.to_dense()) + tm.assert_numpy_array_equal(sdf.values, expected.values) class TestSparseDataFrameArithmetic(object): From c1cd33ec4cd49d2eb0976d9595498de53c993738 Mon Sep 17 00:00:00 2001 From: Kernc Date: Thu, 13 Jul 2017 16:51:39 +0200 Subject: [PATCH 3/3] fixup! BUG: Made SparseDataFrame.fillna() fill all NaNs --- pandas/tests/sparse/test_frame.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/pandas/tests/sparse/test_frame.py b/pandas/tests/sparse/test_frame.py index 619e646fbfff3..2e7a8a591a0f9 100644 --- a/pandas/tests/sparse/test_frame.py +++ b/pandas/tests/sparse/test_frame.py @@ -1286,11 +1286,20 @@ def test_from_scipy_fillna(spmatrix): sdf = pd.SparseDataFrame(spm).fillna(-1.0) # Returning frame should fill all nan values with -1.0 - expected = pd.SparseDataFrame([[1, -1, -1], - [-1, 1, -1], - [-1, -1, 1.]]) + expected = pd.SparseDataFrame({ + 0: pd.SparseSeries([1., -1, -1]), + 1: pd.SparseSeries([np.nan, 1, np.nan]), + 2: pd.SparseSeries([np.nan, np.nan, 1]), + }, default_fill_value=-1) + + # fill_value is expected to be what .fillna() above was called with + # We don't use -1 as initial fill_value in expected SparseSeries + # construction because this way we obtain "compressed" SparseArrays, + # avoiding having to construct them ourselves + for col in expected: + expected[col].fill_value = -1 - tm.assert_numpy_array_equal(sdf.values, expected.values) + tm.assert_sp_frame_equal(sdf, expected) class TestSparseDataFrameArithmetic(object):