Skip to content

Commit 20817a7

Browse files
committed
address comments
1 parent 2426319 commit 20817a7

File tree

2 files changed

+45
-10
lines changed

2 files changed

+45
-10
lines changed

pandas/_libs/lib.pyx

Lines changed: 44 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -671,6 +671,14 @@ def astype_intsafe(ndarray[object] arr, cnp.dtype new_dtype) -> ndarray:
671671
return result
672672

673673

674+
ctypedef enum coerce_options:
675+
all = 0
676+
strict_null = 1
677+
null = 2
678+
non_null = 3
679+
none = 4
680+
681+
674682
@cython.wraparound(False)
675683
@cython.boundscheck(False)
676684
cpdef ndarray[object] ensure_string_array(
@@ -689,11 +697,11 @@ cpdef ndarray[object] ensure_string_array(
689697
The values to be converted to str, if needed.
690698
na_value : Any, default np.nan
691699
The value to use for na. For example, np.nan or pd.NA.
692-
coerce : {'all', 'null', 'non-null', None}, default 'all'
700+
coerce : {'all', 'strict-null', 'null', 'non-null', None}, default 'all'
693701
Whether to coerce non-string elements to strings.
694-
- 'all' will convert null values and non-null non-string values.
702+
- 'all' will convert all non-string values.
695703
- 'strict-null' will only convert pd.NA, np.nan, or None to na_value
696-
without converting other non-strings.
704+
raising when encountering non-strings and other null values.
697705
- 'null' will convert nulls to na_value w/out converting other non-strings.
698706
- 'non-null' will only convert non-null non-string elements to string.
699707
- None will not convert anything.
@@ -715,13 +723,40 @@ cpdef ndarray[object] ensure_string_array(
715723
ValueError
716724
If an element is encountered that is not a string or valid NA value
717725
and element is not coerced.
726+
727+
Examples
728+
--------
729+
>>> import numpy as np
730+
>>> import pandas as pd
731+
>>> ensure_string_array(np.array([1,2,3, np.datetime64("nat")]), coerce="all")
732+
array("1", "2", "3", np.nan)
733+
>>> ensure_string_array(np.array([pd.NA, "a", None]), coerce="strict-null")
734+
array(np.nan, "a", np.nan)
735+
>>> ensure_string_array(np.array([pd.NaT, "1"]), coerce="null")
736+
array(np.nan, "1")
737+
>>> ensure_string_array(np.array([1,2,3]), coerce="non-null")
738+
array("1", "2", "3")
739+
>>> ensure_string_array(np.array(["1", "2", "3"]), coerce=None)
740+
array("1", "2", "3")
718741
"""
719-
if coerce not in {"all", "strict-null", "null", "non-null", None}:
720-
raise ValueError("coerce argument must be one of "
721-
f"'all'|'strict-null'|'null'|'non-null'|None, not {coerce}")
722742
cdef:
723743
Py_ssize_t i = 0, n = len(arr)
724744
set strict_na_values = {C_NA, np.nan, None}
745+
coerce_options coerce_val
746+
747+
if coerce == "all":
748+
coerce_val = all
749+
elif coerce == "strict-null":
750+
coerce_val = strict_null
751+
elif coerce == "null":
752+
coerce_val = null
753+
elif coerce == "non-null":
754+
coerce_val = non_null
755+
elif coerce is None:
756+
coerce_val = none
757+
else:
758+
raise ValueError("coerce argument must be one of "
759+
f"'all'|'strict-null'|'null'|'non-null'|None, not {coerce}")
725760

726761
if hasattr(arr, "to_numpy"):
727762

@@ -741,7 +776,7 @@ cpdef ndarray[object] ensure_string_array(
741776
if copy and result is arr:
742777
result = result.copy()
743778

744-
if coerce == 'strict-null':
779+
if coerce_val == strict_null:
745780
# We don't use checknull, since NaT, Decimal("NaN"), etc. aren't valid
746781
# If they are present, they are treated like a regular Python object
747782
# and will either cause an exception to be raised or be coerced.
@@ -756,7 +791,7 @@ cpdef ndarray[object] ensure_string_array(
756791
continue
757792

758793
if not check_null(val):
759-
if coerce =="all" or coerce == "non-null":
794+
if coerce_val == all or coerce_val == non_null:
760795
if not isinstance(val, np.floating):
761796
# f"{val}" is faster than str(val)
762797
result[i] = f"{val}"
@@ -768,7 +803,7 @@ cpdef ndarray[object] ensure_string_array(
768803
"If you want it to be coerced to a string,"
769804
"specify coerce='all'")
770805
else:
771-
if coerce=="all" or coerce == "null" or coerce == 'strict-null':
806+
if coerce_val != non_null and coerce_val != none:
772807
val = na_value
773808
if skipna:
774809
result[i] = val

pandas/core/arrays/string_.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -252,7 +252,7 @@ class StringArray(BaseStringArray, PandasArray):
252252
:meth:`pandas.array` with ``dtype="string"`` for a stable way of
253253
creating a `StringArray` from any sequence.
254254
255-
.. versionchanged:: 1.3
255+
.. versionchanged:: 1.4.0
256256
257257
StringArray now accepts nan-likes(``None``, ``np.nan``) for the
258258
``values`` parameter in its constructor

0 commit comments

Comments
 (0)