@@ -738,11 +738,23 @@ def astype(self, dtype, copy: bool = True):
738
738
def _reduce (
739
739
self , name : str , * , skipna : bool = True , axis : AxisInt | None = 0 , ** kwargs
740
740
):
741
+ if self .dtype .na_value is np .nan and name in ["any" , "all" ]:
742
+ if name == "any" :
743
+ return nanops .nanany (self ._ndarray , skipna = skipna )
744
+ else :
745
+ return nanops .nanall (self ._ndarray , skipna = skipna )
746
+
741
747
if name in ["min" , "max" ]:
742
748
return getattr (self , name )(skipna = skipna , axis = axis )
743
749
744
750
raise TypeError (f"Cannot perform reduction '{ name } ' with string dtype" )
745
751
752
+ def _wrap_reduction_result (self , axis : AxisInt | None , result ) -> Any :
753
+ if self .dtype .na_value is np .nan and result is libmissing .NA :
754
+ # the masked_reductions use pd.NA -> convert to np.nan
755
+ return np .nan
756
+ return super ()._wrap_reduction_result (axis , result )
757
+
746
758
def min (self , axis = None , skipna : bool = True , ** kwargs ) -> Scalar :
747
759
nv .validate_min ((), kwargs )
748
760
result = masked_reductions .min (
@@ -761,7 +773,11 @@ def value_counts(self, dropna: bool = True) -> Series:
761
773
from pandas .core .algorithms import value_counts_internal as value_counts
762
774
763
775
result = value_counts (self ._ndarray , dropna = dropna ).astype ("Int64" )
776
+ result = value_counts (self ._ndarray , sort = False , dropna = dropna )
764
777
result .index = result .index .astype (self .dtype )
778
+
779
+ if self .dtype .na_value is libmissing .NA :
780
+ result = result .astype ("Int64" )
765
781
return result
766
782
767
783
def memory_usage (self , deep : bool = False ) -> int :
@@ -812,7 +828,13 @@ def _cmp_method(self, other, op):
812
828
# logical
813
829
result = np .zeros (len (self ._ndarray ), dtype = "bool" )
814
830
result [valid ] = op (self ._ndarray [valid ], other )
815
- return BooleanArray (result , mask )
831
+ res_arr = BooleanArray (result , mask )
832
+ if self .dtype .na_value is np .nan :
833
+ if op == operator .ne :
834
+ return res_arr .to_numpy (np .bool_ , na_value = True )
835
+ else :
836
+ return res_arr .to_numpy (np .bool_ , na_value = False )
837
+ return res_arr
816
838
817
839
_arith_method = _cmp_method
818
840
@@ -853,37 +875,6 @@ def _from_backing_data(self, arr: np.ndarray) -> StringArrayNumpySemantics:
853
875
# we always preserve the dtype
854
876
return NDArrayBacked ._from_backing_data (self , arr )
855
877
856
- def _reduce (
857
- self , name : str , * , skipna : bool = True , keepdims : bool = False , ** kwargs
858
- ):
859
- if name in ["any" , "all" ]:
860
- if name == "any" :
861
- return nanops .nanany (self ._ndarray , skipna = skipna )
862
- else :
863
- return nanops .nanall (self ._ndarray , skipna = skipna )
864
- else :
865
- return super ()._reduce (name , skipna = skipna , keepdims = keepdims , ** kwargs )
866
-
867
- def _wrap_reduction_result (self , axis : AxisInt | None , result ) -> Any :
868
- # the masked_reductions use pd.NA
869
- if result is libmissing .NA :
870
- return np .nan
871
- return super ()._wrap_reduction_result (axis , result )
872
-
873
- def _cmp_method (self , other , op ):
874
- result = super ()._cmp_method (other , op )
875
- if op == operator .ne :
876
- return result .to_numpy (np .bool_ , na_value = True )
877
- else :
878
- return result .to_numpy (np .bool_ , na_value = False )
879
-
880
- def value_counts (self , dropna : bool = True ) -> Series :
881
- from pandas .core .algorithms import value_counts_internal as value_counts
882
-
883
- result = value_counts (self ._ndarray , sort = False , dropna = dropna )
884
- result .index = result .index .astype (self .dtype )
885
- return result
886
-
887
878
# ------------------------------------------------------------------------
888
879
# String methods interface
889
880
_str_na_value = np .nan
0 commit comments