diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 8d3a8feb89d67..50e8cc4c82e0d 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -829,6 +829,7 @@ def astype(self, dtype, copy: bool = True): """ from pandas import Index from pandas.core.arrays.string_ import StringDtype + from pandas.core.arrays.string_arrow import ArrowStringDtype if dtype is not None: dtype = pandas_dtype(dtype) @@ -851,7 +852,7 @@ def astype(self, dtype, copy: bool = True): return self._shallow_copy(new_left, new_right) elif is_categorical_dtype(dtype): return Categorical(np.asarray(self), dtype=dtype) - elif isinstance(dtype, StringDtype): + elif isinstance(dtype, (StringDtype, ArrowStringDtype)): return dtype.construct_array_type()._from_sequence(self, copy=False) # TODO: This try/except will be repeated. diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index 180ed51e7fd2b..1692afbf1fc84 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -228,10 +228,21 @@ def _chk_pyarrow_available(cls) -> None: @classmethod def _from_sequence(cls, scalars, dtype: Dtype | None = None, copy: bool = False): + from pandas.core.arrays.masked import BaseMaskedArray + cls._chk_pyarrow_available() - # convert non-na-likes to str, and nan-likes to ArrowStringDtype.na_value - scalars = lib.ensure_string_array(scalars, copy=False) - return cls(pa.array(scalars, type=pa.string(), from_pandas=True)) + + if isinstance(scalars, BaseMaskedArray): + # avoid costly conversion to object dtype in ensure_string_array and + # numerical issues with Float32Dtype + na_values = scalars._mask + result = scalars._data + result = lib.ensure_string_array(result, copy=copy, convert_na_value=False) + return cls(pa.array(result, mask=na_values, type=pa.string())) + + # convert non-na-likes to str + result = lib.ensure_string_array(scalars, copy=copy) + return cls(pa.array(result, type=pa.string(), from_pandas=True)) @classmethod def _from_sequence_of_strings( diff --git a/pandas/tests/extension/base/casting.py b/pandas/tests/extension/base/casting.py index 7c5ef5b3b27d3..47f4f7585243d 100644 --- a/pandas/tests/extension/base/casting.py +++ b/pandas/tests/extension/base/casting.py @@ -43,10 +43,10 @@ def test_astype_str(self, data): expected = pd.Series([str(x) for x in data[:5]], dtype=str) self.assert_series_equal(result, expected) - def test_astype_string(self, data): + def test_astype_string(self, data, nullable_string_dtype): # GH-33465 - result = pd.Series(data[:5]).astype("string") - expected = pd.Series([str(x) for x in data[:5]], dtype="string") + result = pd.Series(data[:5]).astype(nullable_string_dtype) + expected = pd.Series([str(x) for x in data[:5]], dtype=nullable_string_dtype) self.assert_series_equal(result, expected) def test_to_numpy(self, data):