Skip to content

Commit ac7ee27

Browse files
committed
Use ensure_string_array in also in construct_1d_ndarray_preserving_na
1 parent df8e4d6 commit ac7ee27

File tree

3 files changed

+41
-22
lines changed

3 files changed

+41
-22
lines changed

pandas/_libs/lib.pyx

+36-8
Original file line numberDiff line numberDiff line change
@@ -1698,18 +1698,46 @@ cpdef bint is_string_array(ndarray values, bint skipna=False):
16981698
return validator.validate(values)
16991699

17001700

1701-
cpdef ndarray ensure_string_array(ndarray values, object na_value):
1701+
cpdef ndarray ensure_string_array(
1702+
values, object na_value=np.nan, bint convert_na_value=True, bint copy=True):
1703+
"""Returns a new numpy array with object dtype and only strings and na values.
1704+
1705+
Parameters
1706+
---------
1707+
values : array-like
1708+
The values to be converted to str, if needed
1709+
na_value : Any
1710+
The value to use for na. For example, np.nan or pd.NAN
1711+
convert_na_value : bool, default True
1712+
If False, existing na values will be used unchanged in the new array
1713+
copy : bool, default True
1714+
Whether to wnsure that a new array is returned
1715+
1716+
Returns
1717+
-------
1718+
ndarray
1719+
"""
17021720
cdef:
17031721
Py_ssize_t i = 0, n = len(values)
17041722

1705-
for i in range(n):
1706-
val = values[i]
1707-
if not checknull(val):
1708-
values[i] = str(val)
1709-
else:
1710-
values[i] = na_value
1723+
result = np.asarray(values, dtype="object")
1724+
if copy and result is values:
1725+
result = result.copy()
17111726

1712-
return values
1727+
if convert_na_value:
1728+
for i in range(n):
1729+
val = result[i]
1730+
if not checknull(val):
1731+
result[i] = str(val)
1732+
else:
1733+
result[i] = na_value
1734+
else:
1735+
for i in range(n):
1736+
val = result[i]
1737+
if not checknull(val):
1738+
result[i] = str(val)
1739+
1740+
return result
17131741

17141742

17151743
cdef class BytesValidator(Validator):

pandas/core/arrays/string_.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -199,11 +199,10 @@ def _from_sequence(cls, scalars, dtype=None, copy=True):
199199
assert dtype == "string"
200200

201201
result = np.asarray(scalars, dtype="object")
202-
if copy and result is scalars:
203-
result = result.copy()
204202

205203
# convert non-na-likes to str, and nan-likes to StringDtype.na_value
206-
result = lib.ensure_string_array(result, StringDtype.na_value)
204+
result = lib.ensure_string_array(
205+
result, na_value=StringDtype.na_value, copy=copy)
207206

208207
return cls(result)
209208

pandas/core/dtypes/cast.py

+3-11
Original file line numberDiff line numberDiff line change
@@ -1608,19 +1608,11 @@ def construct_1d_ndarray_preserving_na(
16081608
>>> construct_1d_ndarray_preserving_na([1.0, 2.0, None], dtype=np.dtype('str'))
16091609
array(['1.0', '2.0', None], dtype=object)
16101610
"""
1611-
subarr = np.array(values, dtype=dtype, copy=copy)
16121611

16131612
if dtype is not None and dtype.kind == "U":
1614-
# GH-21083
1615-
# We can't just return np.array(subarr, dtype='str') since
1616-
# NumPy will convert the non-string objects into strings
1617-
# Including NA values. Se we have to go
1618-
# string -> object -> update NA, which requires an
1619-
# additional pass over the data.
1620-
na_values = isna(values)
1621-
subarr2 = subarr.astype(object)
1622-
subarr2[na_values] = np.asarray(values, dtype=object)[na_values]
1623-
subarr = subarr2
1613+
subarr = lib.ensure_string_array(values, convert_na_value=False, copy=copy)
1614+
else:
1615+
subarr = np.array(values, dtype=dtype, copy=copy)
16241616

16251617
return subarr
16261618

0 commit comments

Comments
 (0)