52
52
53
53
from pandas .core import roperator
54
54
from pandas .core .arraylike import OpsMixin
55
- from pandas .core .arrays .base import ExtensionArray
55
+ from pandas .core .arrays .base import (
56
+ ExtensionArray ,
57
+ ExtensionArraySupportsAnyAll ,
58
+ )
56
59
import pandas .core .common as com
57
60
from pandas .core .indexers import (
58
61
check_array_indexer ,
@@ -170,7 +173,9 @@ def to_pyarrow_type(
170
173
return None
171
174
172
175
173
- class ArrowExtensionArray (OpsMixin , ExtensionArray , BaseStringArrayMethods ):
176
+ class ArrowExtensionArray (
177
+ OpsMixin , ExtensionArraySupportsAnyAll , BaseStringArrayMethods
178
+ ):
174
179
"""
175
180
Pandas ExtensionArray backed by a PyArrow ChunkedArray.
176
181
@@ -438,8 +443,6 @@ def __setstate__(self, state) -> None:
438
443
self .__dict__ .update (state )
439
444
440
445
def _cmp_method (self , other , op ):
441
- from pandas .arrays import BooleanArray
442
-
443
446
pc_func = ARROW_CMP_FUNCS [op .__name__ ]
444
447
if isinstance (other , ArrowExtensionArray ):
445
448
result = pc_func (self ._data , other ._data )
@@ -453,20 +456,13 @@ def _cmp_method(self, other, op):
453
456
valid = ~ mask
454
457
result = np .zeros (len (self ), dtype = "bool" )
455
458
result [valid ] = op (np .array (self )[valid ], other )
456
- return BooleanArray (result , mask )
459
+ result = pa .array (result , type = pa .bool_ ())
460
+ result = pc .if_else (valid , result , None )
457
461
else :
458
462
raise NotImplementedError (
459
463
f"{ op .__name__ } not implemented for { type (other )} "
460
464
)
461
-
462
- if result .null_count > 0 :
463
- # GH50524: avoid conversion to object for better perf
464
- values = pc .fill_null (result , False ).to_numpy ()
465
- mask = result .is_null ().to_numpy ()
466
- else :
467
- values = result .to_numpy ()
468
- mask = np .zeros (len (values ), dtype = np .bool_ )
469
- return BooleanArray (values , mask )
465
+ return ArrowExtensionArray (result )
470
466
471
467
def _evaluate_op_method (self , other , op , arrow_funcs ):
472
468
pa_type = self ._data .type
@@ -580,6 +576,122 @@ def isna(self) -> npt.NDArray[np.bool_]:
580
576
581
577
return self ._data .is_null ().to_numpy ()
582
578
579
+ def any (self , * , skipna : bool = True , ** kwargs ):
580
+ """
581
+ Return whether any element is truthy.
582
+
583
+ Returns False unless there is at least one element that is truthy.
584
+ By default, NAs are skipped. If ``skipna=False`` is specified and
585
+ missing values are present, similar :ref:`Kleene logic <boolean.kleene>`
586
+ is used as for logical operations.
587
+
588
+ Parameters
589
+ ----------
590
+ skipna : bool, default True
591
+ Exclude NA values. If the entire array is NA and `skipna` is
592
+ True, then the result will be False, as for an empty array.
593
+ If `skipna` is False, the result will still be True if there is
594
+ at least one element that is truthy, otherwise NA will be returned
595
+ if there are NA's present.
596
+
597
+ Returns
598
+ -------
599
+ bool or :attr:`pandas.NA`
600
+
601
+ See Also
602
+ --------
603
+ ArrowExtensionArray.all : Return whether all elements are truthy.
604
+
605
+ Examples
606
+ --------
607
+ The result indicates whether any element is truthy (and by default
608
+ skips NAs):
609
+
610
+ >>> pd.array([True, False, True], dtype="boolean[pyarrow]").any()
611
+ True
612
+ >>> pd.array([True, False, pd.NA], dtype="boolean[pyarrow]").any()
613
+ True
614
+ >>> pd.array([False, False, pd.NA], dtype="boolean[pyarrow]").any()
615
+ False
616
+ >>> pd.array([], dtype="boolean[pyarrow]").any()
617
+ False
618
+ >>> pd.array([pd.NA], dtype="boolean[pyarrow]").any()
619
+ False
620
+ >>> pd.array([pd.NA], dtype="float64[pyarrow]").any()
621
+ False
622
+
623
+ With ``skipna=False``, the result can be NA if this is logically
624
+ required (whether ``pd.NA`` is True or False influences the result):
625
+
626
+ >>> pd.array([True, False, pd.NA], dtype="boolean[pyarrow]").any(skipna=False)
627
+ True
628
+ >>> pd.array([1, 0, pd.NA], dtype="boolean[pyarrow]").any(skipna=False)
629
+ True
630
+ >>> pd.array([False, False, pd.NA], dtype="boolean[pyarrow]").any(skipna=False)
631
+ <NA>
632
+ >>> pd.array([0, 0, pd.NA], dtype="boolean[pyarrow]").any(skipna=False)
633
+ <NA>
634
+ """
635
+ return self ._reduce ("any" , skipna = skipna , ** kwargs )
636
+
637
+ def all (self , * , skipna : bool = True , ** kwargs ):
638
+ """
639
+ Return whether all elements are truthy.
640
+
641
+ Returns True unless there is at least one element that is falsey.
642
+ By default, NAs are skipped. If ``skipna=False`` is specified and
643
+ missing values are present, similar :ref:`Kleene logic <boolean.kleene>`
644
+ is used as for logical operations.
645
+
646
+ Parameters
647
+ ----------
648
+ skipna : bool, default True
649
+ Exclude NA values. If the entire array is NA and `skipna` is
650
+ True, then the result will be True, as for an empty array.
651
+ If `skipna` is False, the result will still be False if there is
652
+ at least one element that is falsey, otherwise NA will be returned
653
+ if there are NA's present.
654
+
655
+ Returns
656
+ -------
657
+ bool or :attr:`pandas.NA`
658
+
659
+ See Also
660
+ --------
661
+ ArrowExtensionArray.any : Return whether any element is truthy.
662
+
663
+ Examples
664
+ --------
665
+ The result indicates whether all elements are truthy (and by default
666
+ skips NAs):
667
+
668
+ >>> pd.array([True, True, pd.NA], dtype="boolean[pyarrow]").all()
669
+ True
670
+ >>> pd.array([1, 1, pd.NA], dtype="boolean[pyarrow]").all()
671
+ True
672
+ >>> pd.array([True, False, pd.NA], dtype="boolean[pyarrow]").all()
673
+ False
674
+ >>> pd.array([], dtype="boolean[pyarrow]").all()
675
+ True
676
+ >>> pd.array([pd.NA], dtype="boolean[pyarrow]").all()
677
+ True
678
+ >>> pd.array([pd.NA], dtype="float64[pyarrow]").all()
679
+ True
680
+
681
+ With ``skipna=False``, the result can be NA if this is logically
682
+ required (whether ``pd.NA`` is True or False influences the result):
683
+
684
+ >>> pd.array([True, True, pd.NA], dtype="boolean[pyarrow]").all(skipna=False)
685
+ <NA>
686
+ >>> pd.array([1, 1, pd.NA], dtype="boolean[pyarrow]").all(skipna=False)
687
+ <NA>
688
+ >>> pd.array([True, False, pd.NA], dtype="boolean[pyarrow]").all(skipna=False)
689
+ False
690
+ >>> pd.array([1, 0, pd.NA], dtype="boolean[pyarrow]").all(skipna=False)
691
+ False
692
+ """
693
+ return self ._reduce ("all" , skipna = skipna , ** kwargs )
694
+
583
695
def argsort (
584
696
self ,
585
697
* ,
0 commit comments