1
1
import numbers
2
- from typing import TYPE_CHECKING , Type
2
+ from typing import TYPE_CHECKING , Any , Tuple , Type
3
3
import warnings
4
4
5
5
import numpy as np
6
6
7
- from pandas ._libs import lib
7
+ from pandas ._libs import lib , missing as libmissing
8
8
from pandas .compat import set_function_name
9
9
10
10
from pandas .core .dtypes .base import ExtensionDtype
@@ -61,13 +61,13 @@ class BooleanDtype(ExtensionDtype):
61
61
@property
62
62
def na_value (self ) -> "Scalar" :
63
63
"""
64
- BooleanDtype uses :attr:`numpy.nan ` as the missing NA value.
64
+ BooleanDtype uses :attr:`pandas.NA ` as the missing NA value.
65
65
66
66
.. warning::
67
67
68
68
`na_value` may change in a future release.
69
69
"""
70
- return np . nan
70
+ return libmissing . NA
71
71
72
72
@property
73
73
def type (self ) -> Type :
@@ -223,7 +223,7 @@ class BooleanArray(ExtensionArray, ExtensionOpsMixin):
223
223
224
224
>>> pd.array([True, False, None], dtype="boolean")
225
225
<BooleanArray>
226
- [True, False, NaN ]
226
+ [True, False, NA ]
227
227
Length: 3, dtype: boolean
228
228
"""
229
229
@@ -262,17 +262,17 @@ def _from_sequence(cls, scalars, dtype=None, copy: bool = False):
262
262
values , mask = coerce_to_array (scalars , copy = copy )
263
263
return BooleanArray (values , mask )
264
264
265
+ def _values_for_factorize (self ) -> Tuple [np .ndarray , Any ]:
266
+ data = self ._data .astype ("int8" )
267
+ data [self ._mask ] = - 1
268
+ return data , - 1
269
+
265
270
@classmethod
266
271
def _from_factorized (cls , values , original : "BooleanArray" ):
267
272
return cls ._from_sequence (values , dtype = original .dtype )
268
273
269
274
def _formatter (self , boxed = False ):
270
- def fmt (x ):
271
- if isna (x ):
272
- return "NaN"
273
- return str (x )
274
-
275
- return fmt
275
+ return str
276
276
277
277
def __getitem__ (self , item ):
278
278
if is_integer (item ):
@@ -281,25 +281,29 @@ def __getitem__(self, item):
281
281
return self ._data [item ]
282
282
return type (self )(self ._data [item ], self ._mask [item ])
283
283
284
- def _coerce_to_ndarray (self , force_bool : bool = False ):
284
+ def _coerce_to_ndarray (self , dtype = None , na_value : "Scalar" = libmissing . NA ):
285
285
"""
286
286
Coerce to an ndarary of object dtype or bool dtype (if force_bool=True).
287
287
288
288
Parameters
289
289
----------
290
- force_bool : bool, default False
291
- If True, return bool array or raise error if not possible (in
292
- presence of missing values)
290
+ dtype : dtype, default object
291
+ The numpy dtype to convert to
292
+ na_value : scalar, optional
293
+ Scalar missing value indicator to use in numpy array. Defaults
294
+ to the native missing value indicator of this array (pd.NA).
293
295
"""
294
- if force_bool :
296
+ if dtype is None :
297
+ dtype = object
298
+ if is_bool_dtype (dtype ):
295
299
if not self .isna ().any ():
296
300
return self ._data
297
301
else :
298
302
raise ValueError (
299
303
"cannot convert to bool numpy array in presence of missing values"
300
304
)
301
- data = self ._data .astype (object )
302
- data [self ._mask ] = self . _na_value
305
+ data = self ._data .astype (dtype )
306
+ data [self ._mask ] = na_value
303
307
return data
304
308
305
309
__array_priority__ = 1000 # higher than ndarray so ops dispatch to us
@@ -309,15 +313,8 @@ def __array__(self, dtype=None):
309
313
the array interface, return my values
310
314
We return an object array here to preserve our scalar values
311
315
"""
312
- if dtype is not None :
313
- if is_bool_dtype (dtype ):
314
- return self ._coerce_to_ndarray (force_bool = True )
315
- # TODO can optimize this to not go through object dtype for
316
- # numeric dtypes
317
- arr = self ._coerce_to_ndarray ()
318
- return arr .astype (dtype , copy = False )
319
316
# by default (no dtype specified), return an object array
320
- return self ._coerce_to_ndarray ()
317
+ return self ._coerce_to_ndarray (dtype = dtype )
321
318
322
319
def __arrow_array__ (self , type = None ):
323
320
"""
@@ -483,8 +480,17 @@ def astype(self, dtype, copy=True):
483
480
return IntegerArray (
484
481
self ._data .astype (dtype .numpy_dtype ), self ._mask .copy (), copy = False
485
482
)
483
+ # for integer, error if there are missing values
484
+ if is_integer_dtype (dtype ):
485
+ if self .isna ().any ():
486
+ raise ValueError ("cannot convert NA to integer" )
487
+ # for float dtype, ensure we use np.nan before casting (numpy cannot
488
+ # deal with pd.NA)
489
+ na_value = self ._na_value
490
+ if is_float_dtype (dtype ):
491
+ na_value = np .nan
486
492
# coerce
487
- data = self ._coerce_to_ndarray ()
493
+ data = self ._coerce_to_ndarray (na_value = na_value )
488
494
return astype_nansafe (data , dtype , copy = None )
489
495
490
496
def value_counts (self , dropna = True ):
@@ -594,8 +600,6 @@ def logical_method(self, other):
594
600
595
601
@classmethod
596
602
def _create_comparison_method (cls , op ):
597
- op_name = op .__name__
598
-
599
603
def cmp_method (self , other ):
600
604
601
605
if isinstance (other , (ABCDataFrame , ABCSeries , ABCIndexClass )):
@@ -617,21 +621,26 @@ def cmp_method(self, other):
617
621
if len (self ) != len (other ):
618
622
raise ValueError ("Lengths must match to compare" )
619
623
620
- # numpy will show a DeprecationWarning on invalid elementwise
621
- # comparisons, this will raise in the future
622
- with warnings .catch_warnings ():
623
- warnings .filterwarnings ("ignore" , "elementwise" , FutureWarning )
624
- with np .errstate (all = "ignore" ):
625
- result = op (self ._data , other )
626
-
627
- # nans propagate
628
- if mask is None :
629
- mask = self ._mask
624
+ if other is libmissing .NA :
625
+ # numpy does not handle pd.NA well as "other" scalar (it returns
626
+ # a scalar False instead of an array)
627
+ result = np .zeros_like (self ._data )
628
+ mask = np .ones_like (self ._data )
630
629
else :
631
- mask = self ._mask | mask
630
+ # numpy will show a DeprecationWarning on invalid elementwise
631
+ # comparisons, this will raise in the future
632
+ with warnings .catch_warnings ():
633
+ warnings .filterwarnings ("ignore" , "elementwise" , FutureWarning )
634
+ with np .errstate (all = "ignore" ):
635
+ result = op (self ._data , other )
636
+
637
+ # nans propagate
638
+ if mask is None :
639
+ mask = self ._mask .copy ()
640
+ else :
641
+ mask = self ._mask | mask
632
642
633
- result [mask ] = op_name == "ne"
634
- return BooleanArray (result , np .zeros (len (result ), dtype = bool ), copy = False )
643
+ return BooleanArray (result , mask , copy = False )
635
644
636
645
name = "__{name}__" .format (name = op .__name__ )
637
646
return set_function_name (cmp_method , name , cls )
@@ -643,7 +652,7 @@ def _reduce(self, name, skipna=True, **kwargs):
643
652
# coerce to a nan-aware float if needed
644
653
if mask .any ():
645
654
data = self ._data .astype ("float64" )
646
- data [mask ] = self . _na_value
655
+ data [mask ] = np . nan
647
656
648
657
op = getattr (nanops , "nan" + name )
649
658
result = op (data , axis = 0 , skipna = skipna , mask = mask , ** kwargs )
0 commit comments