2
2
3
3
import numpy as np
4
4
5
- from pandas ._libs import (
6
- lib ,
7
- missing as libmissing ,
8
- )
9
5
from pandas ._typing import DtypeObj
10
6
from pandas .util ._decorators import cache_readonly
11
7
12
8
from pandas .core .dtypes .base import register_extension_dtype
13
- from pandas .core .dtypes .common import (
14
- is_bool_dtype ,
15
- is_float_dtype ,
16
- is_integer_dtype ,
17
- is_object_dtype ,
18
- is_string_dtype ,
19
- )
20
9
21
10
from pandas .core .arrays .masked import BaseMaskedDtype
22
11
from pandas .core .arrays .numeric import (
@@ -35,6 +24,8 @@ class _IntegerDtype(NumericDtype):
35
24
The attributes name & type are set when these subclasses are created.
36
25
"""
37
26
27
+ _default_np_dtype = np .dtype (np .int64 )
28
+
38
29
def __repr__ (self ) -> str :
39
30
sign = "U" if self .is_unsigned_integer else ""
40
31
return f"{ sign } Int{ 8 * self .itemsize } Dtype()"
@@ -94,49 +85,8 @@ def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None:
94
85
return FLOAT_STR_TO_DTYPE [str (np_dtype )]
95
86
return None
96
87
97
-
98
- def safe_cast (values , dtype , copy : bool ):
99
- """
100
- Safely cast the values to the dtype if they
101
- are equivalent, meaning floats must be equivalent to the
102
- ints.
103
- """
104
- try :
105
- return values .astype (dtype , casting = "safe" , copy = copy )
106
- except TypeError as err :
107
- casted = values .astype (dtype , copy = copy )
108
- if (casted == values ).all ():
109
- return casted
110
-
111
- raise TypeError (
112
- f"cannot safely cast non-equivalent { values .dtype } to { np .dtype (dtype )} "
113
- ) from err
114
-
115
-
116
- def coerce_to_array (
117
- values , dtype , mask = None , copy : bool = False
118
- ) -> tuple [np .ndarray , np .ndarray ]:
119
- """
120
- Coerce the input values array to numpy arrays with a mask.
121
-
122
- Parameters
123
- ----------
124
- values : 1D list-like
125
- dtype : integer dtype
126
- mask : bool 1D array, optional
127
- copy : bool, default False
128
- if True, copy the input
129
-
130
- Returns
131
- -------
132
- tuple of (values, mask)
133
- """
134
- # if values is integer numpy array, preserve its dtype
135
- if dtype is None and hasattr (values , "dtype" ):
136
- if is_integer_dtype (values .dtype ):
137
- dtype = values .dtype
138
-
139
- if dtype is not None :
88
+ @classmethod
89
+ def _standardize_dtype (cls , dtype ) -> _IntegerDtype :
140
90
if isinstance (dtype , str ) and (
141
91
dtype .startswith ("Int" ) or dtype .startswith ("UInt" )
142
92
):
@@ -149,64 +99,26 @@ def coerce_to_array(
149
99
dtype = INT_STR_TO_DTYPE [str (np .dtype (dtype ))]
150
100
except KeyError as err :
151
101
raise ValueError (f"invalid dtype specified { dtype } " ) from err
102
+ return dtype
103
+
104
+ @classmethod
105
+ def _safe_cast (cls , values : np .ndarray , dtype : np .dtype , copy : bool ) -> np .ndarray :
106
+ """
107
+ Safely cast the values to the given dtype.
108
+
109
+ "safe" in this context means the casting is lossless. e.g. if 'values'
110
+ has a floating dtype, each value must be an integer.
111
+ """
112
+ try :
113
+ return values .astype (dtype , casting = "safe" , copy = copy )
114
+ except TypeError as err :
115
+ casted = values .astype (dtype , copy = copy )
116
+ if (casted == values ).all ():
117
+ return casted
152
118
153
- if isinstance (values , IntegerArray ):
154
- values , mask = values ._data , values ._mask
155
- if dtype is not None :
156
- values = values .astype (dtype .numpy_dtype , copy = False )
157
-
158
- if copy :
159
- values = values .copy ()
160
- mask = mask .copy ()
161
- return values , mask
162
-
163
- values = np .array (values , copy = copy )
164
- inferred_type = None
165
- if is_object_dtype (values .dtype ) or is_string_dtype (values .dtype ):
166
- inferred_type = lib .infer_dtype (values , skipna = True )
167
- if inferred_type == "empty" :
168
- pass
169
- elif inferred_type == "boolean" :
170
- raise TypeError (f"{ values .dtype } cannot be converted to a FloatingDtype" )
171
-
172
- elif is_bool_dtype (values ) and is_integer_dtype (dtype ):
173
- values = np .array (values , dtype = int , copy = copy )
174
-
175
- elif not (is_integer_dtype (values ) or is_float_dtype (values )):
176
- raise TypeError (f"{ values .dtype } cannot be converted to an IntegerDtype" )
177
-
178
- if values .ndim != 1 :
179
- raise TypeError ("values must be a 1D list-like" )
180
-
181
- if mask is None :
182
- mask = libmissing .is_numeric_na (values )
183
- else :
184
- assert len (mask ) == len (values )
185
-
186
- if mask .ndim != 1 :
187
- raise TypeError ("mask must be a 1D list-like" )
188
-
189
- # infer dtype if needed
190
- if dtype is None :
191
- dtype = np .dtype ("int64" )
192
- else :
193
- dtype = dtype .type
194
-
195
- # if we are float, let's make sure that we can
196
- # safely cast
197
-
198
- # we copy as need to coerce here
199
- if mask .any ():
200
- values = values .copy ()
201
- values [mask ] = 1
202
- if inferred_type in ("string" , "unicode" ):
203
- # casts from str are always safe since they raise
204
- # a ValueError if the str cannot be parsed into an int
205
- values = values .astype (dtype , copy = copy )
206
- else :
207
- values = safe_cast (values , dtype , copy = False )
208
-
209
- return values , mask
119
+ raise TypeError (
120
+ f"cannot safely cast non-equivalent { values .dtype } to { np .dtype (dtype )} "
121
+ ) from err
210
122
211
123
212
124
class IntegerArray (NumericArray ):
@@ -277,6 +189,8 @@ class IntegerArray(NumericArray):
277
189
Length: 3, dtype: UInt16
278
190
"""
279
191
192
+ _dtype_cls = _IntegerDtype
193
+
280
194
# The value used to fill '_data' to avoid upcasting
281
195
_internal_fill_value = 1
282
196
# Fill values used for any/all
@@ -295,12 +209,6 @@ def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False):
295
209
)
296
210
super ().__init__ (values , mask , copy = copy )
297
211
298
- @classmethod
299
- def _coerce_to_array (
300
- cls , value , * , dtype : DtypeObj , copy : bool = False
301
- ) -> tuple [np .ndarray , np .ndarray ]:
302
- return coerce_to_array (value , dtype = dtype , copy = copy )
303
-
304
212
305
213
_dtype_docstring = """
306
214
An ExtensionDtype for {dtype} integer data.
0 commit comments