From 2ea1793a9244b1cf1ee9d675b065ee56141fb5d3 Mon Sep 17 00:00:00 2001 From: kushagr96 Date: Thu, 25 Jul 2024 20:15:39 -0400 Subject: [PATCH 1/5] Adding overrides for default values --- pandas/core/arrays/sparse/accessor.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/sparse/accessor.py b/pandas/core/arrays/sparse/accessor.py index b8245349a4e62..5d0aa645c1f81 100644 --- a/pandas/core/arrays/sparse/accessor.py +++ b/pandas/core/arrays/sparse/accessor.py @@ -313,7 +313,18 @@ def from_spmatrix(cls, data, index=None, columns=None) -> DataFrame: indices = data.indices indptr = data.indptr array_data = data.data - dtype = SparseDtype(array_data.dtype) + + default_fill_value_override = None + + # Default values for float type is NaN. Hence, in order to create a + # Sparse matrix of type float, we need override this default value as 0.0. + if array_data.dtype.kind in "f": + default_fill_value_override = 0.0 + # Similarly, default value needs to be overridden for complex type arrays. + elif array_data.dtype.kind in "c": + default_fill_value_override = 0.0 + 0.0j + + dtype = SparseDtype(array_data.dtype, fill_value=default_fill_value_override) arrays = [] for i in range(n_columns): sl = slice(indptr[i], indptr[i + 1]) From 0d36e3dc870deb65ecec146809fccdb719794ebd Mon Sep 17 00:00:00 2001 From: kushagr96 Date: Thu, 25 Jul 2024 20:26:05 -0400 Subject: [PATCH 2/5] Adding default value overrides to Sparse dtype --- pandas/core/arrays/sparse/accessor.py | 12 +----------- pandas/core/dtypes/dtypes.py | 8 ++++++++ 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/pandas/core/arrays/sparse/accessor.py b/pandas/core/arrays/sparse/accessor.py index 5d0aa645c1f81..9d382d967efd9 100644 --- a/pandas/core/arrays/sparse/accessor.py +++ b/pandas/core/arrays/sparse/accessor.py @@ -314,17 +314,7 @@ def from_spmatrix(cls, data, index=None, columns=None) -> DataFrame: indptr = data.indptr array_data = data.data - default_fill_value_override = None - - # Default values for float type is NaN. Hence, in order to create a - # Sparse matrix of type float, we need override this default value as 0.0. - if array_data.dtype.kind in "f": - default_fill_value_override = 0.0 - # Similarly, default value needs to be overridden for complex type arrays. - elif array_data.dtype.kind in "c": - default_fill_value_override = 0.0 + 0.0j - - dtype = SparseDtype(array_data.dtype, fill_value=default_fill_value_override) + dtype = SparseDtype(array_data.dtype) arrays = [] for i in range(n_columns): sl = slice(indptr[i], indptr[i + 1]) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 3aeab96e03163..457123570c8f0 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -1747,6 +1747,14 @@ def __init__(self, dtype: Dtype = np.float64, fill_value: Any = None) -> None: if fill_value is None: fill_value = na_value_for_dtype(dtype) + # Default values for float type is NaN. Hence, in order to create a Sparse + # matrix of type float, we need to override this default value as 0.0. + if dtype.kind in "f": + fill_value = 0.0 + # Similarly, default value needs to be overridden for complex type arrays. + elif dtype.kind in "c": + fill_value = 0.0 + 0.0j + self._dtype = dtype self._fill_value = fill_value self._check_fill_value() From 61ad9bda90f2aae67dc8eff3e0cde53d09c3d324 Mon Sep 17 00:00:00 2001 From: kushagr96 Date: Thu, 25 Jul 2024 20:29:57 -0400 Subject: [PATCH 3/5] added issue number --- doc/source/whatsnew/v3.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 5d89613bd3d4f..63f3f3ed66f8d 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -619,7 +619,7 @@ Reshaping Sparse ^^^^^^ - Bug in :class:`SparseDtype` for equal comparison with na fill value. (:issue:`54770`) -- Bug in :meth:`DataFrame.sparse.from_spmatrix` which hard coded an invalid ``fill_value`` for certain subtypes. (:issue:`59063`) +- Bug in :meth:`DataFrame.sparse.from_spmatrix` which hard coded an invalid ``fill_value`` for certain subtypes. (:issue:`59063`, :issue:`59212`) ExtensionArray ^^^^^^^^^^^^^^ From 4a8d42d1a978faf0e967209b54a7e0067678daa3 Mon Sep 17 00:00:00 2001 From: kushagr96 Date: Thu, 25 Jul 2024 20:32:04 -0400 Subject: [PATCH 4/5] Deleting empty line --- pandas/core/arrays/sparse/accessor.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/arrays/sparse/accessor.py b/pandas/core/arrays/sparse/accessor.py index 9d382d967efd9..b8245349a4e62 100644 --- a/pandas/core/arrays/sparse/accessor.py +++ b/pandas/core/arrays/sparse/accessor.py @@ -313,7 +313,6 @@ def from_spmatrix(cls, data, index=None, columns=None) -> DataFrame: indices = data.indices indptr = data.indptr array_data = data.data - dtype = SparseDtype(array_data.dtype) arrays = [] for i in range(n_columns): From aa6c74ec267f971f9c71c49dbb7078c9fc18b3fc Mon Sep 17 00:00:00 2001 From: kushagr96 Date: Wed, 31 Jul 2024 23:16:29 -0400 Subject: [PATCH 5/5] updating default values --- pandas/core/dtypes/dtypes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 457123570c8f0..a851e6afef50f 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -1682,8 +1682,8 @@ class SparseDtype(ExtensionDtype): =========== ========== dtype na_value =========== ========== - float ``np.nan`` - complex ``np.nan`` + float ``0.0`` + complex ``0.0 + 0.0j`` int ``0`` bool ``False`` datetime64 ``pd.NaT``