Skip to content

Commit 2a08b05

Browse files
authored
Faster ensure_string_array (#55183)
1 parent 28f274c commit 2a08b05

File tree

1 file changed

+32
-4
lines changed

1 file changed

+32
-4
lines changed

pandas/_libs/lib.pyx

+32-4
Original file line numberDiff line numberDiff line change
@@ -776,6 +776,7 @@ cpdef ndarray[object] ensure_string_array(
776776
cdef:
777777
Py_ssize_t i = 0, n = len(arr)
778778
bint already_copied = True
779+
ndarray[object] newarr
779780

780781
if hasattr(arr, "to_numpy"):
781782

@@ -800,8 +801,30 @@ cpdef ndarray[object] ensure_string_array(
800801
# short-circuit, all elements are str
801802
return result
802803

804+
if arr.dtype.kind == "f": # non-optimized path
805+
for i in range(n):
806+
val = arr[i]
807+
808+
if not already_copied:
809+
result = result.copy()
810+
already_copied = True
811+
812+
if not checknull(val):
813+
# f"{val}" is not always equivalent to str(val) for floats
814+
result[i] = str(val)
815+
else:
816+
if convert_na_value:
817+
val = na_value
818+
if skipna:
819+
result[i] = val
820+
else:
821+
result[i] = f"{val}"
822+
823+
return result
824+
825+
newarr = np.asarray(arr, dtype=object)
803826
for i in range(n):
804-
val = arr[i]
827+
val = newarr[i]
805828

806829
if isinstance(val, str):
807830
continue
@@ -2831,9 +2854,14 @@ NoDefault = Literal[_NoDefault.no_default]
28312854

28322855
@cython.boundscheck(False)
28332856
@cython.wraparound(False)
2834-
def map_infer_mask(ndarray arr, object f, const uint8_t[:] mask, bint convert=True,
2835-
object na_value=no_default, cnp.dtype dtype=np.dtype(object)
2836-
) -> np.ndarray:
2857+
def map_infer_mask(
2858+
ndarray[object] arr,
2859+
object f,
2860+
const uint8_t[:] mask,
2861+
bint convert=True,
2862+
object na_value=no_default,
2863+
cnp.dtype dtype=np.dtype(object)
2864+
) -> np.ndarray:
28372865
"""
28382866
Substitute for np.vectorize with pandas-friendly dtype inference.
28392867

0 commit comments

Comments
 (0)