Skip to content

Commit fbc2ab6

Browse files
authored
REF: share IntegerArray/FloatingArray coerce_to_array (#45596)
1 parent 4f7c50d commit fbc2ab6

File tree

7 files changed

+162
-219
lines changed

7 files changed

+162
-219
lines changed

pandas/core/arrays/floating.py

+17-95
Original file line numberDiff line numberDiff line change
@@ -2,20 +2,9 @@
22

33
import numpy as np
44

5-
from pandas._libs import (
6-
lib,
7-
missing as libmissing,
8-
)
95
from pandas._typing import DtypeObj
106
from pandas.util._decorators import cache_readonly
117

12-
from pandas.core.dtypes.common import (
13-
is_bool_dtype,
14-
is_float_dtype,
15-
is_integer_dtype,
16-
is_object_dtype,
17-
is_string_dtype,
18-
)
198
from pandas.core.dtypes.dtypes import register_extension_dtype
209

2110
from pandas.core.arrays.numeric import (
@@ -34,6 +23,8 @@ class FloatingDtype(NumericDtype):
3423
The attributes name & type are set when these subclasses are created.
3524
"""
3625

26+
_default_np_dtype = np.dtype(np.float64)
27+
3728
def __repr__(self) -> str:
3829
return f"{self.name}Dtype()"
3930

@@ -66,31 +57,8 @@ def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None:
6657
return FLOAT_STR_TO_DTYPE[str(np_dtype)]
6758
return None
6859

69-
70-
def coerce_to_array(
71-
values, dtype=None, mask=None, copy: bool = False
72-
) -> tuple[np.ndarray, np.ndarray]:
73-
"""
74-
Coerce the input values array to numpy arrays with a mask.
75-
76-
Parameters
77-
----------
78-
values : 1D list-like
79-
dtype : float dtype
80-
mask : bool 1D array, optional
81-
copy : bool, default False
82-
if True, copy the input
83-
84-
Returns
85-
-------
86-
tuple of (values, mask)
87-
"""
88-
# if values is floating numpy array, preserve its dtype
89-
if dtype is None and hasattr(values, "dtype"):
90-
if is_float_dtype(values.dtype):
91-
dtype = values.dtype
92-
93-
if dtype is not None:
60+
@classmethod
61+
def _standardize_dtype(cls, dtype) -> FloatingDtype:
9462
if isinstance(dtype, str) and dtype.startswith("Float"):
9563
# Avoid DeprecationWarning from NumPy about np.dtype("Float64")
9664
# https://github.com/numpy/numpy/pull/7476
@@ -101,60 +69,18 @@ def coerce_to_array(
10169
dtype = FLOAT_STR_TO_DTYPE[str(np.dtype(dtype))]
10270
except KeyError as err:
10371
raise ValueError(f"invalid dtype specified {dtype}") from err
72+
return dtype
10473

105-
if isinstance(values, FloatingArray):
106-
values, mask = values._data, values._mask
107-
if dtype is not None:
108-
values = values.astype(dtype.numpy_dtype, copy=False)
109-
110-
if copy:
111-
values = values.copy()
112-
mask = mask.copy()
113-
return values, mask
114-
115-
values = np.array(values, copy=copy)
116-
if is_object_dtype(values.dtype) or is_string_dtype(values.dtype):
117-
inferred_type = lib.infer_dtype(values, skipna=True)
118-
if inferred_type == "empty":
119-
pass
120-
elif inferred_type == "boolean":
121-
raise TypeError(f"{values.dtype} cannot be converted to a FloatingDtype")
122-
123-
elif is_bool_dtype(values) and is_float_dtype(dtype):
124-
values = np.array(values, dtype=float, copy=copy)
125-
126-
elif not (is_integer_dtype(values) or is_float_dtype(values)):
127-
raise TypeError(f"{values.dtype} cannot be converted to a FloatingDtype")
128-
129-
if values.ndim != 1:
130-
raise TypeError("values must be a 1D list-like")
131-
132-
if mask is None:
133-
mask = libmissing.is_numeric_na(values)
134-
135-
else:
136-
assert len(mask) == len(values)
137-
138-
if not mask.ndim == 1:
139-
raise TypeError("mask must be a 1D list-like")
140-
141-
# infer dtype if needed
142-
if dtype is None:
143-
dtype = np.dtype("float64")
144-
else:
145-
dtype = dtype.type
146-
147-
# if we are float, let's make sure that we can
148-
# safely cast
149-
150-
# we copy as need to coerce here
151-
# TODO should this be a safe cast?
152-
if mask.any():
153-
values = values.copy()
154-
values[mask] = np.nan
155-
values = values.astype(dtype, copy=False) # , casting="safe")
74+
@classmethod
75+
def _safe_cast(cls, values: np.ndarray, dtype: np.dtype, copy: bool) -> np.ndarray:
76+
"""
77+
Safely cast the values to the given dtype.
15678
157-
return values, mask
79+
"safe" in this context means the casting is lossless.
80+
"""
81+
# This is really only here for compatibility with IntegerDtype
82+
# Here for compat with IntegerDtype
83+
return values.astype(dtype, copy=copy)
15884

15985

16086
class FloatingArray(NumericArray):
@@ -217,8 +143,10 @@ class FloatingArray(NumericArray):
217143
Length: 3, dtype: Float32
218144
"""
219145

146+
_dtype_cls = FloatingDtype
147+
220148
# The value used to fill '_data' to avoid upcasting
221-
_internal_fill_value = 0.0
149+
_internal_fill_value = np.nan
222150
# Fill values used for any/all
223151
_truthy_value = 1.0
224152
_falsey_value = 0.0
@@ -239,12 +167,6 @@ def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False):
239167

240168
super().__init__(values, mask, copy=copy)
241169

242-
@classmethod
243-
def _coerce_to_array(
244-
cls, value, *, dtype: DtypeObj, copy: bool = False
245-
) -> tuple[np.ndarray, np.ndarray]:
246-
return coerce_to_array(value, dtype=dtype, copy=copy)
247-
248170

249171
_dtype_docstring = """
250172
An ExtensionDtype for {dtype} data.

pandas/core/arrays/integer.py

+25-117
Original file line numberDiff line numberDiff line change
@@ -2,21 +2,10 @@
22

33
import numpy as np
44

5-
from pandas._libs import (
6-
lib,
7-
missing as libmissing,
8-
)
95
from pandas._typing import DtypeObj
106
from pandas.util._decorators import cache_readonly
117

128
from pandas.core.dtypes.base import register_extension_dtype
13-
from pandas.core.dtypes.common import (
14-
is_bool_dtype,
15-
is_float_dtype,
16-
is_integer_dtype,
17-
is_object_dtype,
18-
is_string_dtype,
19-
)
209

2110
from pandas.core.arrays.masked import BaseMaskedDtype
2211
from pandas.core.arrays.numeric import (
@@ -35,6 +24,8 @@ class _IntegerDtype(NumericDtype):
3524
The attributes name & type are set when these subclasses are created.
3625
"""
3726

27+
_default_np_dtype = np.dtype(np.int64)
28+
3829
def __repr__(self) -> str:
3930
sign = "U" if self.is_unsigned_integer else ""
4031
return f"{sign}Int{8 * self.itemsize}Dtype()"
@@ -94,49 +85,8 @@ def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None:
9485
return FLOAT_STR_TO_DTYPE[str(np_dtype)]
9586
return None
9687

97-
98-
def safe_cast(values, dtype, copy: bool):
99-
"""
100-
Safely cast the values to the dtype if they
101-
are equivalent, meaning floats must be equivalent to the
102-
ints.
103-
"""
104-
try:
105-
return values.astype(dtype, casting="safe", copy=copy)
106-
except TypeError as err:
107-
casted = values.astype(dtype, copy=copy)
108-
if (casted == values).all():
109-
return casted
110-
111-
raise TypeError(
112-
f"cannot safely cast non-equivalent {values.dtype} to {np.dtype(dtype)}"
113-
) from err
114-
115-
116-
def coerce_to_array(
117-
values, dtype, mask=None, copy: bool = False
118-
) -> tuple[np.ndarray, np.ndarray]:
119-
"""
120-
Coerce the input values array to numpy arrays with a mask.
121-
122-
Parameters
123-
----------
124-
values : 1D list-like
125-
dtype : integer dtype
126-
mask : bool 1D array, optional
127-
copy : bool, default False
128-
if True, copy the input
129-
130-
Returns
131-
-------
132-
tuple of (values, mask)
133-
"""
134-
# if values is integer numpy array, preserve its dtype
135-
if dtype is None and hasattr(values, "dtype"):
136-
if is_integer_dtype(values.dtype):
137-
dtype = values.dtype
138-
139-
if dtype is not None:
88+
@classmethod
89+
def _standardize_dtype(cls, dtype) -> _IntegerDtype:
14090
if isinstance(dtype, str) and (
14191
dtype.startswith("Int") or dtype.startswith("UInt")
14292
):
@@ -149,64 +99,26 @@ def coerce_to_array(
14999
dtype = INT_STR_TO_DTYPE[str(np.dtype(dtype))]
150100
except KeyError as err:
151101
raise ValueError(f"invalid dtype specified {dtype}") from err
102+
return dtype
103+
104+
@classmethod
105+
def _safe_cast(cls, values: np.ndarray, dtype: np.dtype, copy: bool) -> np.ndarray:
106+
"""
107+
Safely cast the values to the given dtype.
108+
109+
"safe" in this context means the casting is lossless. e.g. if 'values'
110+
has a floating dtype, each value must be an integer.
111+
"""
112+
try:
113+
return values.astype(dtype, casting="safe", copy=copy)
114+
except TypeError as err:
115+
casted = values.astype(dtype, copy=copy)
116+
if (casted == values).all():
117+
return casted
152118

153-
if isinstance(values, IntegerArray):
154-
values, mask = values._data, values._mask
155-
if dtype is not None:
156-
values = values.astype(dtype.numpy_dtype, copy=False)
157-
158-
if copy:
159-
values = values.copy()
160-
mask = mask.copy()
161-
return values, mask
162-
163-
values = np.array(values, copy=copy)
164-
inferred_type = None
165-
if is_object_dtype(values.dtype) or is_string_dtype(values.dtype):
166-
inferred_type = lib.infer_dtype(values, skipna=True)
167-
if inferred_type == "empty":
168-
pass
169-
elif inferred_type == "boolean":
170-
raise TypeError(f"{values.dtype} cannot be converted to a FloatingDtype")
171-
172-
elif is_bool_dtype(values) and is_integer_dtype(dtype):
173-
values = np.array(values, dtype=int, copy=copy)
174-
175-
elif not (is_integer_dtype(values) or is_float_dtype(values)):
176-
raise TypeError(f"{values.dtype} cannot be converted to an IntegerDtype")
177-
178-
if values.ndim != 1:
179-
raise TypeError("values must be a 1D list-like")
180-
181-
if mask is None:
182-
mask = libmissing.is_numeric_na(values)
183-
else:
184-
assert len(mask) == len(values)
185-
186-
if mask.ndim != 1:
187-
raise TypeError("mask must be a 1D list-like")
188-
189-
# infer dtype if needed
190-
if dtype is None:
191-
dtype = np.dtype("int64")
192-
else:
193-
dtype = dtype.type
194-
195-
# if we are float, let's make sure that we can
196-
# safely cast
197-
198-
# we copy as need to coerce here
199-
if mask.any():
200-
values = values.copy()
201-
values[mask] = 1
202-
if inferred_type in ("string", "unicode"):
203-
# casts from str are always safe since they raise
204-
# a ValueError if the str cannot be parsed into an int
205-
values = values.astype(dtype, copy=copy)
206-
else:
207-
values = safe_cast(values, dtype, copy=False)
208-
209-
return values, mask
119+
raise TypeError(
120+
f"cannot safely cast non-equivalent {values.dtype} to {np.dtype(dtype)}"
121+
) from err
210122

211123

212124
class IntegerArray(NumericArray):
@@ -277,6 +189,8 @@ class IntegerArray(NumericArray):
277189
Length: 3, dtype: UInt16
278190
"""
279191

192+
_dtype_cls = _IntegerDtype
193+
280194
# The value used to fill '_data' to avoid upcasting
281195
_internal_fill_value = 1
282196
# Fill values used for any/all
@@ -295,12 +209,6 @@ def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False):
295209
)
296210
super().__init__(values, mask, copy=copy)
297211

298-
@classmethod
299-
def _coerce_to_array(
300-
cls, value, *, dtype: DtypeObj, copy: bool = False
301-
) -> tuple[np.ndarray, np.ndarray]:
302-
return coerce_to_array(value, dtype=dtype, copy=copy)
303-
304212

305213
_dtype_docstring = """
306214
An ExtensionDtype for {dtype} integer data.

0 commit comments

Comments
 (0)