Skip to content

Commit 89ef931

Browse files
committed
Change according to comments
1 parent f316e42 commit 89ef931

File tree

6 files changed

+19
-19
lines changed

6 files changed

+19
-19
lines changed

doc/source/whatsnew/v1.1.0.rst

+3-11
Original file line numberDiff line numberDiff line change
@@ -18,19 +18,11 @@ Enhancements
1818
All dtypes can now be converted to ``StringDtype``
1919
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
2020

21-
Previously, declaring or converting to :class:`StringDtype` was in general only possible if the data was already only ``str`` or nan-like.
22-
For example:
23-
24-
.. code-block:: ipython
25-
26-
In [1]: pd.Series([1, "abc", np.nan], dtype="string")
27-
Out[1]: ValueError: StringArray requires a sequence of strings or pandas.NA
28-
In [2]: pd.Series([1, 2, np.nan], dtype="Int64").astype("string")
29-
Out[2]: ValueError: StringArray requires a sequence of strings or pandas.NA
30-
31-
This meant that in order to convert arbitrary data to :class:`StringDtype`, you would often have to use ``.astype(str).astype('string')``, which was not intuitive.
21+
Previously, declaring or converting to :class:`StringDtype` was in general only possible if the data was already only ``str`` or nan-like (:issue:`31204`).
3222
:class:`StringDtype` now works in all situations where ``astype(str)`` or ``dtype=str`` work:
3323

24+
For example the below now work:
25+
3426
.. ipython:: python
3527
3628
ser = pd.Series([1, "abc", np.nan], dtype="string")

pandas/core/arrays/base.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -454,7 +454,7 @@ def astype(self, dtype, copy=True):
454454
from pandas.core.arrays.string_ import StringDtype
455455

456456
dtype = pandas_dtype(dtype)
457-
if isinstance(dtype, StringDtype):
457+
if isinstance(dtype, StringDtype): # allow conversion to StringArrays
458458
return dtype.construct_array_type()._from_sequence(self, copy=False)
459459

460460
return np.array(self, dtype=dtype, copy=copy)

pandas/core/arrays/string_.py

+6
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,11 @@ class StringArray(PandasArray):
164164
['1', '1']
165165
Length: 2, dtype: string
166166
167+
On the other hand, instantiating StringArrays directly with non-strings will
168+
raise an error:
169+
>>> pd.arrays.StringArray(np.array([1, 2]))
170+
ValueError: StringArray requires a sequence of strings or pandas.NA
171+
167172
For comparison methods, this returns a :class:`pandas.BooleanArray`
168173
169174
>>> pd.array(["a", None, "c"], dtype="string") == "a"
@@ -211,6 +216,7 @@ def _from_sequence(cls, scalars, dtype=None, copy=False):
211216
if has_nans and result is scalars:
212217
# force a copy now, if we haven't already
213218
result = result.copy()
219+
214220
# convert to str, then to object to avoid dtype like '<U3', then insert na_value
215221
result = np.asarray(result, dtype=str)
216222
result = np.asarray(result, dtype="object")

pandas/core/dtypes/cast.py

+7
Original file line numberDiff line numberDiff line change
@@ -337,9 +337,16 @@ def maybe_cast_to_extension_array(cls: Type["ExtensionArray"], obj, dtype=None):
337337
-------
338338
ExtensionArray or obj
339339
"""
340+
from pandas.core.arrays.string_ import StringArray
341+
340342
assert isinstance(cls, type), f"must pass a type: {cls}"
341343
assertion_msg = f"must pass a subclass of ExtensionArray: {cls}"
342344
assert issubclass(cls, ABCExtensionArray), assertion_msg
345+
346+
# Everything can be be converted to StringArrays, but we may not want to convert
347+
if issubclass(cls, StringArray) and lib.infer_dtype(obj) != "string":
348+
return obj
349+
343350
try:
344351
result = cls._from_sequence(obj, dtype=dtype)
345352
except Exception:

pandas/core/series.py

-4
Original file line numberDiff line numberDiff line change
@@ -2768,10 +2768,6 @@ def combine(self, other, func, fill_value=None) -> "Series":
27682768
if is_categorical_dtype(self.dtype):
27692769
pass
27702770
elif is_extension_array_dtype(self.dtype):
2771-
# Everything can be be converted to strings, but we may not want to convert
2772-
if self.dtype == "string" and lib.infer_dtype(new_values) != "string":
2773-
return self._constructor(new_values, index=new_index, name=new_name)
2774-
27752771
# TODO: can we do this for only SparseDtype?
27762772
# The function can return something of any type, so check
27772773
# if the type is compatible with the calling EA.

pandas/tests/extension/decimal/array.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -136,9 +136,8 @@ def astype(self, dtype, copy=True):
136136
dtype = pandas_dtype(dtype)
137137
if isinstance(dtype, type(self.dtype)):
138138
return type(self)(self._data, context=dtype.context)
139-
elif isinstance(dtype, StringDtype):
140-
return dtype.construct_array_type()._from_sequence(self, copy=False)
141-
return np.asarray(self, dtype=dtype)
139+
140+
return super().astype(dtype, copy=copy)
142141

143142
def __setitem__(self, key, value):
144143
if pd.api.types.is_list_like(value):

0 commit comments

Comments
 (0)