1
1
import operator
2
- from typing import TYPE_CHECKING , Optional , Type , Union
2
+ from typing import TYPE_CHECKING , Type , Union
3
3
4
4
import numpy as np
5
5
@@ -122,9 +122,6 @@ class StringArray(PandasArray):
122
122
123
123
copy : bool, default False
124
124
Whether to copy the array of data.
125
- convert : bool, default False
126
- If true, force conversion of non-na scalars to strings.
127
- If False, raises a ValueError, if a scalar is neither a string nor na.
128
125
129
126
Attributes
130
127
----------
@@ -165,15 +162,7 @@ class StringArray(PandasArray):
165
162
['1', '1']
166
163
Length: 2, dtype: string
167
164
168
- Instantiating StringArrays directly with non-strings arrays will raise an error
169
- unless ``convert=True``.
170
-
171
- >>> pd.arrays.StringArray(np.array(['1', 1]))
172
- ValueError: StringArray requires a sequence of strings or pandas.NA
173
- >>> pd.arrays.StringArray(['1', 1], convert=True)
174
- <StringArray>
175
- ['1', '1']
176
- Length: 2, dtype: string
165
+ However, instantiating StringArrays directly with non-strings will raise an error.
177
166
178
167
For comparison methods, `StringArray` returns a :class:`pandas.BooleanArray`:
179
168
@@ -186,29 +175,22 @@ class StringArray(PandasArray):
186
175
# undo the PandasArray hack
187
176
_typ = "extension"
188
177
189
- def __init__ (self , values , copy = False , convert : bool = False ):
178
+ def __init__ (self , values , copy = False ):
190
179
values = extract_array (values )
191
- if not isinstance (values , type (self )):
192
- if convert :
193
- na_val = StringDtype .na_value
194
- values = lib .ensure_string_array (values , na_value = na_val , copy = copy )
195
- else :
196
- self ._validate (values )
197
180
198
181
super ().__init__ (values , copy = copy )
199
182
self ._dtype = StringDtype ()
183
+ if not isinstance (values , type (self )):
184
+ self ._validate ()
200
185
201
- def _validate (self , values : Optional [ np . ndarray ] = None ) -> None :
186
+ def _validate (self ) :
202
187
"""Validate that we only store NA or strings."""
203
- if values is None :
204
- values = self ._ndarray
205
-
206
- if len (values ) and not lib .is_string_array (values , skipna = True ):
188
+ if len (self ._ndarray ) and not lib .is_string_array (self ._ndarray , skipna = True ):
207
189
raise ValueError ("StringArray requires a sequence of strings or pandas.NA" )
208
- if values .dtype != "object" :
190
+ if self . _ndarray .dtype != "object" :
209
191
raise ValueError (
210
192
"StringArray requires a sequence of strings or pandas.NA. Got "
211
- f"'{ values .dtype } ' dtype instead."
193
+ f"'{ self . _ndarray .dtype } ' dtype instead."
212
194
)
213
195
214
196
@classmethod
@@ -217,8 +199,18 @@ def _from_sequence(cls, scalars, dtype=None, copy=False):
217
199
assert dtype == "string"
218
200
219
201
result = np .asarray (scalars , dtype = "object" )
220
-
221
- return cls (result , copy = copy , convert = True )
202
+ # convert non-na-likes to str, and nan-likes to StringDtype.na_value
203
+ result = lib .ensure_string_array (
204
+ result , na_value = StringDtype .na_value , copy = copy
205
+ )
206
+
207
+ # Manually creating new array avoids the validation step in the __init__, so is
208
+ # faster. Refactor need for validation?
209
+ new_string_array = object .__new__ (cls )
210
+ new_string_array ._dtype = StringDtype ()
211
+ new_string_array ._ndarray = result
212
+
213
+ return new_string_array
222
214
223
215
@classmethod
224
216
def _from_sequence_of_strings (cls , strings , dtype = None , copy = False ):
0 commit comments