Skip to content

Commit 0851ac3

Browse files
authored
REF (string): Move StringArrayNumpySemantics methods to base class (#59514)
* REF (string): Move StringArrayNumpySemantics methods to base class * mypy fixup
1 parent d36c589 commit 0851ac3

File tree

1 file changed

+23
-33
lines changed

1 file changed

+23
-33
lines changed

pandas/core/arrays/string_.py

+23-33
Original file line numberDiff line numberDiff line change
@@ -746,6 +746,12 @@ def _reduce(
746746
axis: AxisInt | None = 0,
747747
**kwargs,
748748
):
749+
if self.dtype.na_value is np.nan and name in ["any", "all"]:
750+
if name == "any":
751+
return nanops.nanany(self._ndarray, skipna=skipna)
752+
else:
753+
return nanops.nanall(self._ndarray, skipna=skipna)
754+
749755
if name in ["min", "max"]:
750756
result = getattr(self, name)(skipna=skipna, axis=axis)
751757
if keepdims:
@@ -754,6 +760,12 @@ def _reduce(
754760

755761
raise TypeError(f"Cannot perform reduction '{name}' with string dtype")
756762

763+
def _wrap_reduction_result(self, axis: AxisInt | None, result) -> Any:
764+
if self.dtype.na_value is np.nan and result is libmissing.NA:
765+
# the masked_reductions use pd.NA -> convert to np.nan
766+
return np.nan
767+
return super()._wrap_reduction_result(axis, result)
768+
757769
def min(self, axis=None, skipna: bool = True, **kwargs) -> Scalar:
758770
nv.validate_min((), kwargs)
759771
result = masked_reductions.min(
@@ -771,8 +783,11 @@ def max(self, axis=None, skipna: bool = True, **kwargs) -> Scalar:
771783
def value_counts(self, dropna: bool = True) -> Series:
772784
from pandas.core.algorithms import value_counts_internal as value_counts
773785

774-
result = value_counts(self._ndarray, sort=False, dropna=dropna).astype("Int64")
786+
result = value_counts(self._ndarray, sort=False, dropna=dropna)
775787
result.index = result.index.astype(self.dtype)
788+
789+
if self.dtype.na_value is libmissing.NA:
790+
result = result.astype("Int64")
776791
return result
777792

778793
def memory_usage(self, deep: bool = False) -> int:
@@ -823,7 +838,13 @@ def _cmp_method(self, other, op):
823838
# logical
824839
result = np.zeros(len(self._ndarray), dtype="bool")
825840
result[valid] = op(self._ndarray[valid], other)
826-
return BooleanArray(result, mask)
841+
res_arr = BooleanArray(result, mask)
842+
if self.dtype.na_value is np.nan:
843+
if op == operator.ne:
844+
return res_arr.to_numpy(np.bool_, na_value=True)
845+
else:
846+
return res_arr.to_numpy(np.bool_, na_value=False)
847+
return res_arr
827848

828849
_arith_method = _cmp_method
829850

@@ -864,37 +885,6 @@ def _from_backing_data(self, arr: np.ndarray) -> StringArrayNumpySemantics:
864885
# we always preserve the dtype
865886
return NDArrayBacked._from_backing_data(self, arr)
866887

867-
def _reduce(
868-
self, name: str, *, skipna: bool = True, keepdims: bool = False, **kwargs
869-
):
870-
if name in ["any", "all"]:
871-
if name == "any":
872-
return nanops.nanany(self._ndarray, skipna=skipna)
873-
else:
874-
return nanops.nanall(self._ndarray, skipna=skipna)
875-
else:
876-
return super()._reduce(name, skipna=skipna, keepdims=keepdims, **kwargs)
877-
878-
def _wrap_reduction_result(self, axis: AxisInt | None, result) -> Any:
879-
# the masked_reductions use pd.NA
880-
if result is libmissing.NA:
881-
return np.nan
882-
return super()._wrap_reduction_result(axis, result)
883-
884-
def _cmp_method(self, other, op):
885-
result = super()._cmp_method(other, op)
886-
if op == operator.ne:
887-
return result.to_numpy(np.bool_, na_value=True)
888-
else:
889-
return result.to_numpy(np.bool_, na_value=False)
890-
891-
def value_counts(self, dropna: bool = True) -> Series:
892-
from pandas.core.algorithms import value_counts_internal as value_counts
893-
894-
result = value_counts(self._ndarray, sort=False, dropna=dropna)
895-
result.index = result.index.astype(self.dtype)
896-
return result
897-
898888
# ------------------------------------------------------------------------
899889
# String methods interface
900890
_str_na_value = np.nan

0 commit comments

Comments
 (0)