Skip to content

Commit 56931c4

Browse files
authored
BUG: SparseArray[dt64_weird_fill_value].astype(int64) (pandas-dev#50087)
* BUG: SparseArray[dt64_weird_fill_value].astype(int64) * GH ref
1 parent 8d1be80 commit 56931c4

File tree

4 files changed

+39
-23
lines changed

4 files changed

+39
-23
lines changed

doc/source/whatsnew/v2.0.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -826,7 +826,7 @@ Reshaping
826826

827827
Sparse
828828
^^^^^^
829-
- Bug in :meth:`Series.astype` when converting a ``SparseDtype`` with ``datetime64[ns]`` subtype to ``int64`` dtype raising, inconsistent with the non-sparse behavior (:issue:`49631`)
829+
- Bug in :meth:`Series.astype` when converting a ``SparseDtype`` with ``datetime64[ns]`` subtype to ``int64`` dtype raising, inconsistent with the non-sparse behavior (:issue:`49631`,:issue:`50087`)
830830
- Bug in :meth:`Series.astype` when converting a from ``datetime64[ns]`` to ``Sparse[datetime64[ns]]`` incorrectly raising (:issue:`50082`)
831831
-
832832

pandas/core/arrays/sparse/array.py

+17-21
Original file line numberDiff line numberDiff line change
@@ -49,10 +49,7 @@
4949
validate_insert_loc,
5050
)
5151

52-
from pandas.core.dtypes.astype import (
53-
astype_array,
54-
astype_nansafe,
55-
)
52+
from pandas.core.dtypes.astype import astype_array
5653
from pandas.core.dtypes.cast import (
5754
construct_1d_arraylike_from_scalar,
5855
find_common_type,
@@ -445,7 +442,7 @@ def __init__(
445442
# NumPy may raise a ValueError on data like [1, []]
446443
# we retry with object dtype here.
447444
if dtype is None:
448-
dtype = object
445+
dtype = np.dtype(object)
449446
data = np.atleast_1d(np.asarray(data, dtype=dtype))
450447
else:
451448
raise
@@ -464,10 +461,7 @@ def __init__(
464461
if isinstance(data, type(self)) and sparse_index is None:
465462
sparse_index = data._sparse_index
466463
# error: Argument "dtype" to "asarray" has incompatible type
467-
# "Union[ExtensionDtype, dtype[Any], Type[object], None]"; expected
468-
# "Union[dtype[Any], None, type, _SupportsDType, str, Union[Tuple[Any, int],
469-
# Tuple[Any, Union[int, Sequence[int]]], List[Any], _DTypeDict, Tuple[Any,
470-
# Any]]]"
464+
# "Union[ExtensionDtype, dtype[Any], None]"; expected "None"
471465
sparse_values = np.asarray(
472466
data.sp_values, dtype=dtype # type: ignore[arg-type]
473467
)
@@ -487,21 +481,18 @@ def __init__(
487481
if fill_value is NaT:
488482
fill_value = np.datetime64("NaT", "ns")
489483
data = np.asarray(data)
490-
sparse_values, sparse_index, fill_value = make_sparse(
491-
# error: Argument "dtype" to "make_sparse" has incompatible type
492-
# "Union[ExtensionDtype, dtype[Any], Type[object], None]"; expected
493-
# "Union[str, dtype[Any], None]"
484+
sparse_values, sparse_index, fill_value = _make_sparse(
485+
# error: Argument "dtype" to "_make_sparse" has incompatible type
486+
# "Union[ExtensionDtype, dtype[Any], None]"; expected
487+
# "Optional[dtype[Any]]"
494488
data,
495489
kind=kind,
496490
fill_value=fill_value,
497491
dtype=dtype, # type: ignore[arg-type]
498492
)
499493
else:
500494
# error: Argument "dtype" to "asarray" has incompatible type
501-
# "Union[ExtensionDtype, dtype[Any], Type[object], None]"; expected
502-
# "Union[dtype[Any], None, type, _SupportsDType, str, Union[Tuple[Any, int],
503-
# Tuple[Any, Union[int, Sequence[int]]], List[Any], _DTypeDict, Tuple[Any,
504-
# Any]]]"
495+
# "Union[ExtensionDtype, dtype[Any], None]"; expected "None"
505496
sparse_values = np.asarray(data, dtype=dtype) # type: ignore[arg-type]
506497
if len(sparse_values) != sparse_index.npoints:
507498
raise AssertionError(
@@ -1288,7 +1279,9 @@ def astype(self, dtype: AstypeArg | None = None, copy: bool = True):
12881279
dtype = self.dtype.update_dtype(dtype)
12891280
subtype = pandas_dtype(dtype._subtype_with_str)
12901281
subtype = cast(np.dtype, subtype) # ensured by update_dtype
1291-
sp_values = astype_nansafe(self.sp_values, subtype, copy=copy)
1282+
values = ensure_wrapped_if_datetimelike(self.sp_values)
1283+
sp_values = astype_array(values, subtype, copy=copy)
1284+
sp_values = np.asarray(sp_values)
12921285

12931286
return self._simple_new(sp_values, self.sp_index, dtype)
12941287

@@ -1828,11 +1821,11 @@ def _formatter(self, boxed: bool = False):
18281821
return None
18291822

18301823

1831-
def make_sparse(
1824+
def _make_sparse(
18321825
arr: np.ndarray,
18331826
kind: SparseIndexKind = "block",
18341827
fill_value=None,
1835-
dtype: NpDtype | None = None,
1828+
dtype: np.dtype | None = None,
18361829
):
18371830
"""
18381831
Convert ndarray to sparse format
@@ -1882,7 +1875,10 @@ def make_sparse(
18821875
index = make_sparse_index(length, indices, kind)
18831876
sparsified_values = arr[mask]
18841877
if dtype is not None:
1885-
sparsified_values = astype_nansafe(sparsified_values, dtype=pandas_dtype(dtype))
1878+
sparsified_values = ensure_wrapped_if_datetimelike(sparsified_values)
1879+
sparsified_values = astype_array(sparsified_values, dtype=dtype)
1880+
sparsified_values = np.asarray(sparsified_values)
1881+
18861882
# TODO: copy
18871883
return sparsified_values, index, fill_value
18881884

pandas/core/arrays/sparse/dtype.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,15 @@ def __eq__(self, other: Any) -> bool:
127127
or isinstance(other.fill_value, type(self.fill_value))
128128
)
129129
else:
130-
fill_value = self.fill_value == other.fill_value
130+
with warnings.catch_warnings():
131+
# Ignore spurious numpy warning
132+
warnings.filterwarnings(
133+
"ignore",
134+
"elementwise comparison failed",
135+
category=DeprecationWarning,
136+
)
137+
138+
fill_value = self.fill_value == other.fill_value
131139

132140
return subtype and fill_value
133141
return False

pandas/tests/arrays/sparse/test_astype.py

+12
Original file line numberDiff line numberDiff line change
@@ -119,3 +119,15 @@ def test_astype_dt64_to_int64(self):
119119
result = arr.astype("int64")
120120
expected = values.astype("int64")
121121
tm.assert_numpy_array_equal(result, expected)
122+
123+
# we should also be able to cast to equivalent Sparse[int64]
124+
dtype_int64 = SparseDtype("int64", np.iinfo(np.int64).min)
125+
result2 = arr.astype(dtype_int64)
126+
tm.assert_numpy_array_equal(result2.to_numpy(), expected)
127+
128+
# GH#50087 we should match the non-sparse behavior regardless of
129+
# if we have a fill_value other than NaT
130+
dtype = SparseDtype("datetime64[ns]", values[1])
131+
arr3 = SparseArray(values, dtype=dtype)
132+
result3 = arr3.astype("int64")
133+
tm.assert_numpy_array_equal(result3, expected)

0 commit comments

Comments
 (0)