@@ -93,9 +93,6 @@ class StringArray(PandasArray):
93
93
StringArray is considered experimental. The implementation and
94
94
parts of the API may change without warning.
95
95
96
- In particular, the NA value used may change to no longer be
97
- ``numpy.nan``.
98
-
99
96
Parameters
100
97
----------
101
98
values : array-like
@@ -104,8 +101,11 @@ class StringArray(PandasArray):
104
101
.. warning::
105
102
106
103
Currently, this expects an object-dtype ndarray
107
- where the elements are Python strings. This may
108
- change without warning in the future.
104
+ where the elements are Python strings or :attr:`pandas.NA`.
105
+ This may change without warning in the future. Use
106
+ :meth:`pandas.array` with ``dtype="string"`` for a stable way of
107
+ creating a `StringArray` from any sequence.
108
+
109
109
copy : bool, default False
110
110
Whether to copy the array of data.
111
111
@@ -119,6 +119,8 @@ class StringArray(PandasArray):
119
119
120
120
See Also
121
121
--------
122
+ array
123
+ The recommended function for creating a StringArray.
122
124
Series.str
123
125
The string methods are available on Series backed by
124
126
a StringArray.
@@ -165,25 +167,33 @@ def __init__(self, values, copy=False):
165
167
def _validate (self ):
166
168
"""Validate that we only store NA or strings."""
167
169
if len (self ._ndarray ) and not lib .is_string_array (self ._ndarray , skipna = True ):
168
- raise ValueError (
169
- "StringArray requires a sequence of strings or missing values."
170
- )
170
+ raise ValueError ("StringArray requires a sequence of strings or pandas.NA" )
171
171
if self ._ndarray .dtype != "object" :
172
172
raise ValueError (
173
- "StringArray requires a sequence of strings. Got "
173
+ "StringArray requires a sequence of strings or pandas.NA . Got "
174
174
f"'{ self ._ndarray .dtype } ' dtype instead."
175
175
)
176
176
177
177
@classmethod
178
178
def _from_sequence (cls , scalars , dtype = None , copy = False ):
179
179
if dtype :
180
180
assert dtype == "string"
181
- result = super ()._from_sequence (scalars , dtype = object , copy = copy )
181
+
182
+ result = np .asarray (scalars , dtype = "object" )
183
+ if copy and result is scalars :
184
+ result = result .copy ()
185
+
182
186
# Standardize all missing-like values to NA
183
187
# TODO: it would be nice to do this in _validate / lib.is_string_array
184
188
# We are already doing a scan over the values there.
185
- result [result .isna ()] = StringDtype .na_value
186
- return result
189
+ na_values = isna (result )
190
+ if na_values .any ():
191
+ if result is scalars :
192
+ # force a copy now, if we haven't already
193
+ result = result .copy ()
194
+ result [na_values ] = StringDtype .na_value
195
+
196
+ return cls (result )
187
197
188
198
@classmethod
189
199
def _from_sequence_of_strings (cls , strings , dtype = None , copy = False ):
0 commit comments