53
53
54
54
from pandas .core import roperator
55
55
from pandas .core .arraylike import OpsMixin
56
- from pandas .core .arrays .base import ExtensionArray
56
+ from pandas .core .arrays .base import (
57
+ ExtensionArray ,
58
+ ExtensionArraySupportsAnyAll ,
59
+ )
57
60
import pandas .core .common as com
58
61
from pandas .core .indexers import (
59
62
check_array_indexer ,
@@ -171,7 +174,9 @@ def to_pyarrow_type(
171
174
return None
172
175
173
176
174
- class ArrowExtensionArray (OpsMixin , ExtensionArray , BaseStringArrayMethods ):
177
+ class ArrowExtensionArray (
178
+ OpsMixin , ExtensionArraySupportsAnyAll , BaseStringArrayMethods
179
+ ):
175
180
"""
176
181
Pandas ExtensionArray backed by a PyArrow ChunkedArray.
177
182
@@ -429,8 +434,6 @@ def __setstate__(self, state) -> None:
429
434
self .__dict__ .update (state )
430
435
431
436
def _cmp_method (self , other , op ):
432
- from pandas .arrays import BooleanArray
433
-
434
437
pc_func = ARROW_CMP_FUNCS [op .__name__ ]
435
438
if isinstance (other , ArrowExtensionArray ):
436
439
result = pc_func (self ._data , other ._data )
@@ -444,20 +447,13 @@ def _cmp_method(self, other, op):
444
447
valid = ~ mask
445
448
result = np .zeros (len (self ), dtype = "bool" )
446
449
result [valid ] = op (np .array (self )[valid ], other )
447
- return BooleanArray (result , mask )
450
+ result = pa .array (result , type = pa .bool_ ())
451
+ result = pc .if_else (valid , result , None )
448
452
else :
449
453
raise NotImplementedError (
450
454
f"{ op .__name__ } not implemented for { type (other )} "
451
455
)
452
-
453
- if result .null_count > 0 :
454
- # GH50524: avoid conversion to object for better perf
455
- values = pc .fill_null (result , False ).to_numpy ()
456
- mask = result .is_null ().to_numpy ()
457
- else :
458
- values = result .to_numpy ()
459
- mask = np .zeros (len (values ), dtype = np .bool_ )
460
- return BooleanArray (values , mask )
456
+ return ArrowExtensionArray (result )
461
457
462
458
def _evaluate_op_method (self , other , op , arrow_funcs ):
463
459
pa_type = self ._data .type
@@ -564,6 +560,122 @@ def isna(self) -> npt.NDArray[np.bool_]:
564
560
"""
565
561
return self ._data .is_null ().to_numpy ()
566
562
563
+ def any (self , * , skipna : bool = True , ** kwargs ):
564
+ """
565
+ Return whether any element is truthy.
566
+
567
+ Returns False unless there is at least one element that is truthy.
568
+ By default, NAs are skipped. If ``skipna=False`` is specified and
569
+ missing values are present, similar :ref:`Kleene logic <boolean.kleene>`
570
+ is used as for logical operations.
571
+
572
+ Parameters
573
+ ----------
574
+ skipna : bool, default True
575
+ Exclude NA values. If the entire array is NA and `skipna` is
576
+ True, then the result will be False, as for an empty array.
577
+ If `skipna` is False, the result will still be True if there is
578
+ at least one element that is truthy, otherwise NA will be returned
579
+ if there are NA's present.
580
+
581
+ Returns
582
+ -------
583
+ bool or :attr:`pandas.NA`
584
+
585
+ See Also
586
+ --------
587
+ ArrowExtensionArray.all : Return whether all elements are truthy.
588
+
589
+ Examples
590
+ --------
591
+ The result indicates whether any element is truthy (and by default
592
+ skips NAs):
593
+
594
+ >>> pd.array([True, False, True], dtype="boolean[pyarrow]").any()
595
+ True
596
+ >>> pd.array([True, False, pd.NA], dtype="boolean[pyarrow]").any()
597
+ True
598
+ >>> pd.array([False, False, pd.NA], dtype="boolean[pyarrow]").any()
599
+ False
600
+ >>> pd.array([], dtype="boolean[pyarrow]").any()
601
+ False
602
+ >>> pd.array([pd.NA], dtype="boolean[pyarrow]").any()
603
+ False
604
+ >>> pd.array([pd.NA], dtype="float64[pyarrow]").any()
605
+ False
606
+
607
+ With ``skipna=False``, the result can be NA if this is logically
608
+ required (whether ``pd.NA`` is True or False influences the result):
609
+
610
+ >>> pd.array([True, False, pd.NA], dtype="boolean[pyarrow]").any(skipna=False)
611
+ True
612
+ >>> pd.array([1, 0, pd.NA], dtype="boolean[pyarrow]").any(skipna=False)
613
+ True
614
+ >>> pd.array([False, False, pd.NA], dtype="boolean[pyarrow]").any(skipna=False)
615
+ <NA>
616
+ >>> pd.array([0, 0, pd.NA], dtype="boolean[pyarrow]").any(skipna=False)
617
+ <NA>
618
+ """
619
+ return self ._reduce ("any" , skipna = skipna , ** kwargs )
620
+
621
+ def all (self , * , skipna : bool = True , ** kwargs ):
622
+ """
623
+ Return whether all elements are truthy.
624
+
625
+ Returns True unless there is at least one element that is falsey.
626
+ By default, NAs are skipped. If ``skipna=False`` is specified and
627
+ missing values are present, similar :ref:`Kleene logic <boolean.kleene>`
628
+ is used as for logical operations.
629
+
630
+ Parameters
631
+ ----------
632
+ skipna : bool, default True
633
+ Exclude NA values. If the entire array is NA and `skipna` is
634
+ True, then the result will be True, as for an empty array.
635
+ If `skipna` is False, the result will still be False if there is
636
+ at least one element that is falsey, otherwise NA will be returned
637
+ if there are NA's present.
638
+
639
+ Returns
640
+ -------
641
+ bool or :attr:`pandas.NA`
642
+
643
+ See Also
644
+ --------
645
+ ArrowExtensionArray.any : Return whether any element is truthy.
646
+
647
+ Examples
648
+ --------
649
+ The result indicates whether all elements are truthy (and by default
650
+ skips NAs):
651
+
652
+ >>> pd.array([True, True, pd.NA], dtype="boolean[pyarrow]").all()
653
+ True
654
+ >>> pd.array([1, 1, pd.NA], dtype="boolean[pyarrow]").all()
655
+ True
656
+ >>> pd.array([True, False, pd.NA], dtype="boolean[pyarrow]").all()
657
+ False
658
+ >>> pd.array([], dtype="boolean[pyarrow]").all()
659
+ True
660
+ >>> pd.array([pd.NA], dtype="boolean[pyarrow]").all()
661
+ True
662
+ >>> pd.array([pd.NA], dtype="float64[pyarrow]").all()
663
+ True
664
+
665
+ With ``skipna=False``, the result can be NA if this is logically
666
+ required (whether ``pd.NA`` is True or False influences the result):
667
+
668
+ >>> pd.array([True, True, pd.NA], dtype="boolean[pyarrow]").all(skipna=False)
669
+ <NA>
670
+ >>> pd.array([1, 1, pd.NA], dtype="boolean[pyarrow]").all(skipna=False)
671
+ <NA>
672
+ >>> pd.array([True, False, pd.NA], dtype="boolean[pyarrow]").all(skipna=False)
673
+ False
674
+ >>> pd.array([1, 0, pd.NA], dtype="boolean[pyarrow]").all(skipna=False)
675
+ False
676
+ """
677
+ return self ._reduce ("all" , skipna = skipna , ** kwargs )
678
+
567
679
def argsort (
568
680
self ,
569
681
* ,
0 commit comments