Skip to content

Commit b1ee91f

Browse files
jbrockmendeljorisvandenbossche
authored andcommitted
REF (string): Move StringArrayNumpySemantics methods to base class (#59514)
* REF (string): Move StringArrayNumpySemantics methods to base class * mypy fixup
1 parent 6239d5d commit b1ee91f

File tree

1 file changed

+23
-32
lines changed

1 file changed

+23
-32
lines changed

pandas/core/arrays/string_.py

+23-32
Original file line numberDiff line numberDiff line change
@@ -738,11 +738,23 @@ def astype(self, dtype, copy: bool = True):
738738
def _reduce(
739739
self, name: str, *, skipna: bool = True, axis: AxisInt | None = 0, **kwargs
740740
):
741+
if self.dtype.na_value is np.nan and name in ["any", "all"]:
742+
if name == "any":
743+
return nanops.nanany(self._ndarray, skipna=skipna)
744+
else:
745+
return nanops.nanall(self._ndarray, skipna=skipna)
746+
741747
if name in ["min", "max"]:
742748
return getattr(self, name)(skipna=skipna, axis=axis)
743749

744750
raise TypeError(f"Cannot perform reduction '{name}' with string dtype")
745751

752+
def _wrap_reduction_result(self, axis: AxisInt | None, result) -> Any:
753+
if self.dtype.na_value is np.nan and result is libmissing.NA:
754+
# the masked_reductions use pd.NA -> convert to np.nan
755+
return np.nan
756+
return super()._wrap_reduction_result(axis, result)
757+
746758
def min(self, axis=None, skipna: bool = True, **kwargs) -> Scalar:
747759
nv.validate_min((), kwargs)
748760
result = masked_reductions.min(
@@ -761,7 +773,11 @@ def value_counts(self, dropna: bool = True) -> Series:
761773
from pandas.core.algorithms import value_counts_internal as value_counts
762774

763775
result = value_counts(self._ndarray, dropna=dropna).astype("Int64")
776+
result = value_counts(self._ndarray, sort=False, dropna=dropna)
764777
result.index = result.index.astype(self.dtype)
778+
779+
if self.dtype.na_value is libmissing.NA:
780+
result = result.astype("Int64")
765781
return result
766782

767783
def memory_usage(self, deep: bool = False) -> int:
@@ -812,7 +828,13 @@ def _cmp_method(self, other, op):
812828
# logical
813829
result = np.zeros(len(self._ndarray), dtype="bool")
814830
result[valid] = op(self._ndarray[valid], other)
815-
return BooleanArray(result, mask)
831+
res_arr = BooleanArray(result, mask)
832+
if self.dtype.na_value is np.nan:
833+
if op == operator.ne:
834+
return res_arr.to_numpy(np.bool_, na_value=True)
835+
else:
836+
return res_arr.to_numpy(np.bool_, na_value=False)
837+
return res_arr
816838

817839
_arith_method = _cmp_method
818840

@@ -853,37 +875,6 @@ def _from_backing_data(self, arr: np.ndarray) -> StringArrayNumpySemantics:
853875
# we always preserve the dtype
854876
return NDArrayBacked._from_backing_data(self, arr)
855877

856-
def _reduce(
857-
self, name: str, *, skipna: bool = True, keepdims: bool = False, **kwargs
858-
):
859-
if name in ["any", "all"]:
860-
if name == "any":
861-
return nanops.nanany(self._ndarray, skipna=skipna)
862-
else:
863-
return nanops.nanall(self._ndarray, skipna=skipna)
864-
else:
865-
return super()._reduce(name, skipna=skipna, keepdims=keepdims, **kwargs)
866-
867-
def _wrap_reduction_result(self, axis: AxisInt | None, result) -> Any:
868-
# the masked_reductions use pd.NA
869-
if result is libmissing.NA:
870-
return np.nan
871-
return super()._wrap_reduction_result(axis, result)
872-
873-
def _cmp_method(self, other, op):
874-
result = super()._cmp_method(other, op)
875-
if op == operator.ne:
876-
return result.to_numpy(np.bool_, na_value=True)
877-
else:
878-
return result.to_numpy(np.bool_, na_value=False)
879-
880-
def value_counts(self, dropna: bool = True) -> Series:
881-
from pandas.core.algorithms import value_counts_internal as value_counts
882-
883-
result = value_counts(self._ndarray, sort=False, dropna=dropna)
884-
result.index = result.index.astype(self.dtype)
885-
return result
886-
887878
# ------------------------------------------------------------------------
888879
# String methods interface
889880
_str_na_value = np.nan

0 commit comments

Comments
 (0)