Skip to content

Commit ecc451d

Browse files
authored
REF (string): de-duplicate str_map_nan_semantics (#59464)
REF: de-duplicate str_map_nan_semantics
1 parent c831ccd commit ecc451d

File tree

2 files changed

+5
-46
lines changed

2 files changed

+5
-46
lines changed

pandas/core/arrays/string_.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -395,7 +395,7 @@ def _str_map(
395395
return constructor(result, mask)
396396

397397
else:
398-
return self._str_map_str_or_object(dtype, na_value, arr, f, mask, convert)
398+
return self._str_map_str_or_object(dtype, na_value, arr, f, mask)
399399

400400
def _str_map_str_or_object(
401401
self,
@@ -404,7 +404,6 @@ def _str_map_str_or_object(
404404
arr: np.ndarray,
405405
f,
406406
mask: npt.NDArray[np.bool_],
407-
convert: bool,
408407
):
409408
# _str_map helper for case where dtype is either string dtype or object
410409
if is_string_dtype(dtype) and not is_object_dtype(dtype):
@@ -438,7 +437,6 @@ def _str_map_nan_semantics(
438437

439438
mask = isna(self)
440439
arr = np.asarray(self)
441-
convert = convert and not np.all(mask)
442440

443441
if is_integer_dtype(dtype) or is_bool_dtype(dtype):
444442
na_value_is_na = isna(na_value)
@@ -457,6 +455,9 @@ def _str_map_nan_semantics(
457455
dtype=np.dtype(cast(type, dtype)),
458456
)
459457
if na_value_is_na and mask.any():
458+
# TODO: we could alternatively do this check before map_infer_mask
459+
# and adjust the dtype/na_value we pass there. Which is more
460+
# performant?
460461
if is_integer_dtype(dtype):
461462
result = result.astype("float64")
462463
else:
@@ -465,7 +466,7 @@ def _str_map_nan_semantics(
465466
return result
466467

467468
else:
468-
return self._str_map_str_or_object(dtype, na_value, arr, f, mask, convert)
469+
return self._str_map_str_or_object(dtype, na_value, arr, f, mask)
469470

470471

471472
# error: Definition of "_concat_same_type" in base class "NDArrayBacked" is

pandas/core/arrays/string_arrow.py

-42
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
from typing import (
77
TYPE_CHECKING,
88
Union,
9-
cast,
109
)
1110

1211
import numpy as np
@@ -23,8 +22,6 @@
2322
)
2423

2524
from pandas.core.dtypes.common import (
26-
is_bool_dtype,
27-
is_integer_dtype,
2825
is_scalar,
2926
pandas_dtype,
3027
)
@@ -281,45 +278,6 @@ def astype(self, dtype, copy: bool = True):
281278

282279
_str_map = BaseStringArray._str_map
283280

284-
def _str_map_nan_semantics(
285-
self, f, na_value=None, dtype: Dtype | None = None, convert: bool = True
286-
):
287-
if dtype is None:
288-
dtype = self.dtype
289-
if na_value is None:
290-
na_value = self.dtype.na_value
291-
292-
mask = isna(self)
293-
arr = np.asarray(self)
294-
295-
if is_integer_dtype(dtype) or is_bool_dtype(dtype):
296-
if is_integer_dtype(dtype):
297-
na_value = np.nan
298-
else:
299-
na_value = False
300-
301-
dtype = np.dtype(cast(type, dtype))
302-
if mask.any():
303-
# numpy int/bool dtypes cannot hold NaNs so we must convert to
304-
# float64 for int (to match maybe_convert_objects) or
305-
# object for bool (again to match maybe_convert_objects)
306-
if is_integer_dtype(dtype):
307-
dtype = np.dtype("float64")
308-
else:
309-
dtype = np.dtype(object)
310-
result = lib.map_infer_mask(
311-
arr,
312-
f,
313-
mask.view("uint8"),
314-
convert=False,
315-
na_value=na_value,
316-
dtype=dtype,
317-
)
318-
return result
319-
320-
else:
321-
return self._str_map_str_or_object(dtype, na_value, arr, f, mask, convert)
322-
323281
def _str_contains(
324282
self, pat, case: bool = True, flags: int = 0, na=np.nan, regex: bool = True
325283
):

0 commit comments

Comments
 (0)