11
11
from pandas .core .dtypes .generic import ABCSeries , ABCIndexClass
12
12
from pandas .core .dtypes .common import (
13
13
is_integer , is_scalar , is_float ,
14
+ is_bool_dtype ,
14
15
is_float_dtype ,
15
16
is_integer_dtype ,
16
17
is_object_dtype ,
@@ -76,7 +77,7 @@ def construct_from_string(cls, string):
76
77
"'{}'" .format (cls , string ))
77
78
78
79
79
- def to_integer_array (values , dtype = None ):
80
+ def integer_array (values , dtype = None , copy = False ):
80
81
"""
81
82
Infer and return an integer array of the values.
82
83
@@ -85,6 +86,7 @@ def to_integer_array(values, dtype=None):
85
86
values : 1D list-like
86
87
dtype : dtype, optional
87
88
dtype to coerce
89
+ copy : boolean, default False
88
90
89
91
Returns
90
92
-------
@@ -94,7 +96,8 @@ def to_integer_array(values, dtype=None):
94
96
------
95
97
TypeError if incompatible types
96
98
"""
97
- return IntegerArray (values , dtype = dtype , copy = False )
99
+ values , mask = coerce_to_array (values , dtype = dtype , copy = copy )
100
+ return IntegerArray (values , mask )
98
101
99
102
100
103
def safe_cast (values , dtype , copy ):
@@ -133,6 +136,11 @@ def coerce_to_array(values, dtype, mask=None, copy=False):
133
136
-------
134
137
tuple of (values, mask)
135
138
"""
139
+ # if values is integer numpy array, preserve it's dtype
140
+ if dtype is None and hasattr (values , 'dtype' ):
141
+ if is_integer_dtype (values .dtype ):
142
+ dtype = values .dtype
143
+
136
144
if dtype is not None :
137
145
if not issubclass (type (dtype ), _IntegerDtype ):
138
146
try :
@@ -174,10 +182,7 @@ def coerce_to_array(values, dtype, mask=None, copy=False):
174
182
175
183
# infer dtype if needed
176
184
if dtype is None :
177
- if is_integer_dtype (values ):
178
- dtype = values .dtype
179
- else :
180
- dtype = np .dtype ('int64' )
185
+ dtype = np .dtype ('int64' )
181
186
else :
182
187
dtype = dtype .type
183
188
@@ -197,47 +202,62 @@ def coerce_to_array(values, dtype, mask=None, copy=False):
197
202
198
203
class IntegerArray (ExtensionArray , ExtensionOpsMixin ):
199
204
"""
200
- We represent an IntegerArray with 2 numpy arrays
205
+ Array of integer (optional missing) values.
206
+
207
+ We represent an IntegerArray with 2 numpy arrays:
208
+
201
209
- data: contains a numpy integer array of the appropriate dtype
202
- - mask: a boolean array holding a mask on the data, False is missing
210
+ - mask: a boolean array holding a mask on the data, True is missing
211
+
212
+ To construct an IntegerArray from generic array-like input, use
213
+ ``integer_array`` function instead.
214
+
215
+ Parameters
216
+ ----------
217
+ values : integer 1D numpy array
218
+ mask : boolean 1D numpy array
219
+ copy : bool, default False
220
+
221
+ Returns
222
+ -------
223
+ IntegerArray
224
+
203
225
"""
204
226
205
227
@cache_readonly
206
228
def dtype (self ):
207
229
return _dtypes [str (self ._data .dtype )]
208
230
209
- def __init__ (self , values , mask = None , dtype = None , copy = False ):
210
- """
211
- Parameters
212
- ----------
213
- values : 1D list-like / IntegerArray
214
- mask : 1D list-like, optional
215
- dtype : subclass of _IntegerDtype, optional
216
- copy : bool, default False
231
+ def __init__ (self , values , mask , copy = False ):
232
+ if not ( isinstance ( values , np . ndarray )
233
+ and is_integer_dtype ( values . dtype )):
234
+ raise TypeError ( "values should be integer numpy array. Use "
235
+ "the 'integer_array' function instead" )
236
+ if not ( isinstance ( mask , np . ndarray ) and is_bool_dtype ( mask . dtype )):
237
+ raise TypeError ( "mask should be boolean numpy array. Use "
238
+ "the 'integer_array' function instead" )
217
239
218
- Returns
219
- -------
220
- IntegerArray
221
- """
222
- self ._data , self . _mask = coerce_to_array (
223
- values , dtype = dtype , mask = mask , copy = copy )
240
+ if copy :
241
+ values = values . copy ()
242
+ mask = mask . copy ()
243
+
244
+ self ._data = values
245
+ self . _mask = mask
224
246
225
247
@classmethod
226
248
def _from_sequence (cls , scalars , dtype = None , copy = False ):
227
- return cls (scalars , dtype = dtype , copy = copy )
249
+ return integer_array (scalars , dtype = dtype , copy = copy )
228
250
229
251
@classmethod
230
252
def _from_factorized (cls , values , original ):
231
- return cls (values , dtype = original .dtype )
253
+ return integer_array (values , dtype = original .dtype )
232
254
233
255
def __getitem__ (self , item ):
234
256
if is_integer (item ):
235
257
if self ._mask [item ]:
236
258
return self .dtype .na_value
237
259
return self ._data [item ]
238
- return type (self )(self ._data [item ],
239
- mask = self ._mask [item ],
240
- dtype = self .dtype )
260
+ return type (self )(self ._data [item ], self ._mask [item ])
241
261
242
262
def _coerce_to_ndarray (self ):
243
263
"""
@@ -294,7 +314,7 @@ def take(self, indexer, allow_fill=False, fill_value=None):
294
314
result [fill_mask ] = fill_value
295
315
mask = mask ^ fill_mask
296
316
297
- return type (self )(result , mask = mask , dtype = self . dtype , copy = False )
317
+ return type (self )(result , mask , copy = False )
298
318
299
319
def copy (self , deep = False ):
300
320
data , mask = self ._data , self ._mask
@@ -304,7 +324,7 @@ def copy(self, deep=False):
304
324
else :
305
325
data = data .copy ()
306
326
mask = mask .copy ()
307
- return type (self )(data , mask , dtype = self . dtype , copy = False )
327
+ return type (self )(data , mask , copy = False )
308
328
309
329
def __setitem__ (self , key , value ):
310
330
_is_scalar = is_scalar (value )
@@ -356,7 +376,7 @@ def _na_value(self):
356
376
def _concat_same_type (cls , to_concat ):
357
377
data = np .concatenate ([x ._data for x in to_concat ])
358
378
mask = np .concatenate ([x ._mask for x in to_concat ])
359
- return cls (data , mask = mask , dtype = to_concat [ 0 ]. dtype )
379
+ return cls (data , mask )
360
380
361
381
def astype (self , dtype , copy = True ):
362
382
"""Cast to a NumPy array or IntegerArray with 'dtype'.
@@ -386,8 +406,7 @@ def astype(self, dtype, copy=True):
386
406
if isinstance (dtype , _IntegerDtype ):
387
407
result = self ._data .astype (dtype .numpy_dtype ,
388
408
casting = 'same_kind' , copy = False )
389
- return type (self )(result , mask = self ._mask ,
390
- dtype = dtype , copy = False )
409
+ return type (self )(result , mask = self ._mask , copy = False )
391
410
392
411
# coerce
393
412
data = self ._coerce_to_ndarray ()
@@ -523,7 +542,7 @@ def _maybe_mask_result(self, result, mask, other, op_name):
523
542
result [mask ] = np .nan
524
543
return result
525
544
526
- return type (self )(result , mask = mask , dtype = self . dtype , copy = False )
545
+ return type (self )(result , mask , copy = False )
527
546
528
547
@classmethod
529
548
def _create_arithmetic_method (cls , op ):
0 commit comments