Skip to content

Commit ea91afb

Browse files
committed
API: more permissive conversion to StringDtype
1 parent e36b92c commit ea91afb

File tree

5 files changed

+44
-5
lines changed

5 files changed

+44
-5
lines changed

pandas/core/arrays/base.py

+23
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,29 @@ def _from_sequence_of_strings(cls, strings, dtype=None, copy=False):
213213
"""
214214
raise AbstractMethodError(cls)
215215

216+
@classmethod
217+
def _from_sequence_of_any_type(cls, scalars, dtype=None, copy=False):
218+
"""
219+
Construct a new ExtensionArray from a sequence of unknown types of scalars.
220+
221+
.. versionadded:: 1.1.0
222+
223+
Parameters
224+
----------
225+
scalars : Sequence
226+
Each element can be an instance of unknown scalar types.
227+
dtype : dtype, optional
228+
Construct for this particular dtype. This should be a Dtype
229+
compatible with the ExtensionArray.
230+
copy : bool, default False
231+
If True, copy the underlying data.
232+
233+
Returns
234+
-------
235+
ExtensionArray
236+
"""
237+
return cls._from_sequence(scalars, dtype=dtype, copy=copy)
238+
216239
@classmethod
217240
def _from_factorized(cls, values, original):
218241
"""

pandas/core/arrays/integer.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import numbers
2-
from typing import TYPE_CHECKING, Tuple, Type, Union
2+
from typing import TYPE_CHECKING, Dict, Tuple, Type, Union
33
import warnings
44

55
import numpy as np
@@ -764,7 +764,7 @@ class UInt64Dtype(_IntegerDtype):
764764
__doc__ = _dtype_docstring.format(dtype="uint64")
765765

766766

767-
_dtypes = {
767+
_dtypes: Dict[str, _IntegerDtype] = {
768768
"int8": Int8Dtype(),
769769
"int16": Int16Dtype(),
770770
"int32": Int32Dtype(),

pandas/core/arrays/string_.py

+15
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,21 @@ def _from_sequence(cls, scalars, dtype=None, copy=False):
215215
def _from_sequence_of_strings(cls, strings, dtype=None, copy=False):
216216
return cls._from_sequence(strings, dtype=dtype, copy=copy)
217217

218+
@classmethod
219+
def _from_sequence_of_any_type(cls, scalars, dtype=None, copy=False):
220+
values = np.asarray(scalars, dtype="object")
221+
na_values = isna(values)
222+
has_nans = na_values.any()
223+
if has_nans and values is scalars:
224+
# force a copy now, if we haven't already
225+
values = values.copy()
226+
# convert to str, then to object to avoid dtype like '<U3', then insert na_value
227+
values = np.asarray(values, dtype=str)
228+
values = np.asarray(values, dtype="object")
229+
if has_nans:
230+
values[na_values] = dtype.na_value
231+
return cls._from_sequence(values, dtype=dtype, copy=copy)
232+
218233
def __arrow_array__(self, type=None):
219234
"""
220235
Convert myself into a pyarrow Array.

pandas/core/construction.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -283,7 +283,7 @@ def array(
283283

284284
if is_extension_array_dtype(dtype):
285285
cls = cast(ExtensionDtype, dtype).construct_array_type()
286-
return cls._from_sequence(data, dtype=dtype, copy=copy)
286+
return cls._from_sequence_of_any_type(data, dtype=dtype, copy=copy)
287287

288288
if dtype is None:
289289
inferred_dtype = lib.infer_dtype(data, skipna=True)
@@ -562,7 +562,7 @@ def _try_cast(
562562
elif is_extension_array_dtype(dtype):
563563
# create an extension array from its dtype
564564
dtype = cast(ExtensionDtype, dtype)
565-
array_type = dtype.construct_array_type()._from_sequence
565+
array_type = dtype.construct_array_type()._from_sequence_of_any_type
566566
subarr = array_type(arr, dtype=dtype, copy=copy)
567567
elif dtype is not None and raise_cast_failure:
568568
raise

pandas/core/dtypes/cast.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -924,7 +924,8 @@ def astype_nansafe(arr, dtype, copy: bool = True, skipna: bool = False):
924924
"""
925925
# dispatch on extension dtype if needed
926926
if is_extension_array_dtype(dtype):
927-
return dtype.construct_array_type()._from_sequence(arr, dtype=dtype, copy=copy)
927+
arr_type = dtype.construct_array_type()
928+
return arr_type._from_sequence_of_any_type(arr, dtype=dtype, copy=copy)
928929

929930
if not isinstance(dtype, np.dtype):
930931
dtype = pandas_dtype(dtype)

0 commit comments

Comments
 (0)