Skip to content

Commit 5d87407

Browse files
jbrockmendelyehoshuadimarsky
authored andcommitted
REF: share BooleanArray.astype+NumericArray.astype (pandas-dev#45420)
1 parent b112fa5 commit 5d87407

File tree

5 files changed

+33
-154
lines changed

5 files changed

+33
-154
lines changed

pandas/core/arrays/boolean.py

+2-77
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,7 @@
11
from __future__ import annotations
22

33
import numbers
4-
from typing import (
5-
TYPE_CHECKING,
6-
overload,
7-
)
4+
from typing import TYPE_CHECKING
85

96
import numpy as np
107

@@ -13,30 +10,19 @@
1310
missing as libmissing,
1411
)
1512
from pandas._typing import (
16-
ArrayLike,
17-
AstypeArg,
1813
Dtype,
1914
DtypeObj,
20-
npt,
2115
type_t,
2216
)
2317

2418
from pandas.core.dtypes.common import (
25-
is_bool_dtype,
26-
is_float_dtype,
27-
is_integer_dtype,
2819
is_list_like,
2920
is_numeric_dtype,
30-
pandas_dtype,
31-
)
32-
from pandas.core.dtypes.dtypes import (
33-
ExtensionDtype,
34-
register_extension_dtype,
3521
)
22+
from pandas.core.dtypes.dtypes import register_extension_dtype
3623
from pandas.core.dtypes.missing import isna
3724

3825
from pandas.core import ops
39-
from pandas.core.arrays import ExtensionArray
4026
from pandas.core.arrays.masked import (
4127
BaseMaskedArray,
4228
BaseMaskedDtype,
@@ -360,67 +346,6 @@ def _coerce_to_array(
360346
assert dtype == "boolean"
361347
return coerce_to_array(value, copy=copy)
362348

363-
@overload
364-
def astype(self, dtype: npt.DTypeLike, copy: bool = ...) -> np.ndarray:
365-
...
366-
367-
@overload
368-
def astype(self, dtype: ExtensionDtype, copy: bool = ...) -> ExtensionArray:
369-
...
370-
371-
@overload
372-
def astype(self, dtype: AstypeArg, copy: bool = ...) -> ArrayLike:
373-
...
374-
375-
def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:
376-
377-
"""
378-
Cast to a NumPy array or ExtensionArray with 'dtype'.
379-
380-
Parameters
381-
----------
382-
dtype : str or dtype
383-
Typecode or data-type to which the array is cast.
384-
copy : bool, default True
385-
Whether to copy the data, even if not necessary. If False,
386-
a copy is made only if the old dtype does not match the
387-
new dtype.
388-
389-
Returns
390-
-------
391-
ndarray or ExtensionArray
392-
NumPy ndarray, BooleanArray or IntegerArray with 'dtype' for its dtype.
393-
394-
Raises
395-
------
396-
TypeError
397-
if incompatible type with an BooleanDtype, equivalent of same_kind
398-
casting
399-
"""
400-
dtype = pandas_dtype(dtype)
401-
402-
if isinstance(dtype, ExtensionDtype):
403-
return super().astype(dtype, copy)
404-
405-
if is_bool_dtype(dtype):
406-
# astype_nansafe converts np.nan to True
407-
if self._hasna:
408-
raise ValueError("cannot convert float NaN to bool")
409-
else:
410-
return self._data.astype(dtype, copy=copy)
411-
412-
# for integer, error if there are missing values
413-
if is_integer_dtype(dtype) and self._hasna:
414-
raise ValueError("cannot convert NA to integer")
415-
416-
# for float dtype, ensure we use np.nan before casting (numpy cannot
417-
# deal with pd.NA)
418-
na_value = self._na_value
419-
if is_float_dtype(dtype):
420-
na_value = np.nan
421-
# coerce
422-
return self.to_numpy(dtype=dtype, na_value=na_value, copy=False)
423-
424349
def _logical_method(self, other, op):
425350

426351
assert op.__name__ in {"or_", "ror_", "and_", "rand_", "xor", "rxor"}

pandas/core/arrays/masked.py

+26-1
Original file line numberDiff line numberDiff line change
@@ -37,10 +37,12 @@
3737
)
3838
from pandas.util._validators import validate_fillna_kwargs
3939

40+
from pandas.core.dtypes.astype import astype_nansafe
4041
from pandas.core.dtypes.base import ExtensionDtype
4142
from pandas.core.dtypes.common import (
4243
is_bool,
4344
is_bool_dtype,
45+
is_datetime64_dtype,
4446
is_dtype_equal,
4547
is_float,
4648
is_float_dtype,
@@ -450,7 +452,30 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:
450452
eacls = dtype.construct_array_type()
451453
return eacls._from_sequence(self, dtype=dtype, copy=copy)
452454

453-
raise NotImplementedError("subclass must implement astype to np.dtype")
455+
na_value: float | np.datetime64 | lib.NoDefault
456+
457+
# coerce
458+
if is_float_dtype(dtype):
459+
# In astype, we consider dtype=float to also mean na_value=np.nan
460+
na_value = np.nan
461+
elif is_datetime64_dtype(dtype):
462+
na_value = np.datetime64("NaT")
463+
else:
464+
na_value = lib.no_default
465+
466+
# to_numpy will also raise, but we get somewhat nicer exception messages here
467+
if is_integer_dtype(dtype) and self._hasna:
468+
raise ValueError("cannot convert NA to integer")
469+
if is_bool_dtype(dtype) and self._hasna:
470+
# careful: astype_nansafe converts np.nan to True
471+
raise ValueError("cannot convert float NaN to bool")
472+
473+
data = self.to_numpy(dtype=dtype, na_value=na_value, copy=copy)
474+
if self.dtype.kind == "f":
475+
# TODO: make this consistent between IntegerArray/FloatingArray,
476+
# see test_astype_str
477+
return astype_nansafe(data, dtype, copy=False)
478+
return data
454479

455480
__array_priority__ = 1000 # higher than ndarray so ops dispatch to us
456481

pandas/core/arrays/numeric.py

+1-72
Original file line numberDiff line numberDiff line change
@@ -5,35 +5,25 @@
55
from typing import (
66
TYPE_CHECKING,
77
TypeVar,
8-
overload,
98
)
109

1110
import numpy as np
1211

1312
from pandas._libs import (
1413
Timedelta,
15-
lib,
1614
missing as libmissing,
1715
)
18-
from pandas._typing import (
19-
ArrayLike,
20-
AstypeArg,
21-
Dtype,
22-
npt,
23-
)
16+
from pandas._typing import Dtype
2417
from pandas.compat.numpy import function as nv
2518

26-
from pandas.core.dtypes.astype import astype_nansafe
2719
from pandas.core.dtypes.common import (
28-
is_datetime64_dtype,
2920
is_float,
3021
is_float_dtype,
3122
is_integer,
3223
is_integer_dtype,
3324
is_list_like,
3425
pandas_dtype,
3526
)
36-
from pandas.core.dtypes.dtypes import ExtensionDtype
3727

3828
from pandas.core.arrays.masked import (
3929
BaseMaskedArray,
@@ -43,7 +33,6 @@
4333
if TYPE_CHECKING:
4434
import pyarrow
4535

46-
from pandas.core.arrays import ExtensionArray
4736

4837
T = TypeVar("T", bound="NumericArray")
4938

@@ -112,66 +101,6 @@ def _from_sequence_of_strings(
112101
scalars = to_numeric(strings, errors="raise")
113102
return cls._from_sequence(scalars, dtype=dtype, copy=copy)
114103

115-
@overload
116-
def astype(self, dtype: npt.DTypeLike, copy: bool = ...) -> np.ndarray:
117-
...
118-
119-
@overload
120-
def astype(self, dtype: ExtensionDtype, copy: bool = ...) -> ExtensionArray:
121-
...
122-
123-
@overload
124-
def astype(self, dtype: AstypeArg, copy: bool = ...) -> ArrayLike:
125-
...
126-
127-
def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:
128-
"""
129-
Cast to a NumPy array or ExtensionArray with 'dtype'.
130-
131-
Parameters
132-
----------
133-
dtype : str or dtype
134-
Typecode or data-type to which the array is cast.
135-
copy : bool, default True
136-
Whether to copy the data, even if not necessary. If False,
137-
a copy is made only if the old dtype does not match the
138-
new dtype.
139-
140-
Returns
141-
-------
142-
ndarray or ExtensionArray
143-
NumPy ndarray, or BooleanArray, IntegerArray or FloatingArray with
144-
'dtype' for its dtype.
145-
146-
Raises
147-
------
148-
TypeError
149-
if incompatible type with our dtype, equivalent of same_kind
150-
casting
151-
"""
152-
dtype = pandas_dtype(dtype)
153-
154-
if isinstance(dtype, ExtensionDtype):
155-
return super().astype(dtype, copy=copy)
156-
157-
na_value: float | np.datetime64 | lib.NoDefault
158-
159-
# coerce
160-
if is_float_dtype(dtype):
161-
# In astype, we consider dtype=float to also mean na_value=np.nan
162-
na_value = np.nan
163-
elif is_datetime64_dtype(dtype):
164-
na_value = np.datetime64("NaT")
165-
else:
166-
na_value = lib.no_default
167-
168-
data = self.to_numpy(dtype=dtype, na_value=na_value, copy=copy)
169-
if self.dtype.kind == "f":
170-
# TODO: make this consistent between IntegerArray/FloatingArray,
171-
# see test_astype_str
172-
return astype_nansafe(data, dtype, copy=False)
173-
return data
174-
175104
def _arith_method(self, other, op):
176105
op_name = op.__name__
177106
omask = None

pandas/tests/arrays/floating/test_astype.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,10 @@ def test_astype():
99
# with missing values
1010
arr = pd.array([0.1, 0.2, None], dtype="Float64")
1111

12-
with pytest.raises(ValueError, match="cannot convert to 'int64'-dtype NumPy"):
12+
with pytest.raises(ValueError, match="cannot convert NA to integer"):
1313
arr.astype("int64")
1414

15-
with pytest.raises(ValueError, match="cannot convert to 'bool'-dtype NumPy"):
15+
with pytest.raises(ValueError, match="cannot convert float NaN to bool"):
1616
arr.astype("bool")
1717

1818
result = arr.astype("float64")

pandas/tests/arrays/integer/test_dtypes.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ def test_preserve_dtypes(op):
5454
def test_astype_nansafe():
5555
# see gh-22343
5656
arr = pd.array([np.nan, 1, 2], dtype="Int8")
57-
msg = "cannot convert to 'uint32'-dtype NumPy array with missing values."
57+
msg = "cannot convert NA to integer"
5858

5959
with pytest.raises(ValueError, match=msg):
6060
arr.astype("uint32")
@@ -136,7 +136,7 @@ def test_astype(all_data):
136136

137137
# coerce to same numpy_dtype - mixed
138138
s = pd.Series(mixed)
139-
msg = r"cannot convert to .*-dtype NumPy array with missing values.*"
139+
msg = "cannot convert NA to integer"
140140
with pytest.raises(ValueError, match=msg):
141141
s.astype(all_data.dtype.numpy_dtype)
142142

0 commit comments

Comments
 (0)