Skip to content

Commit 864060f

Browse files
simonjayhawkinsyeshsurya
authored andcommitted
[ArrowStringArray] BUG: fix test_astype_string for Float32Dtype (pandas-dev#40998)
1 parent cba6b66 commit 864060f

File tree

3 files changed

+19
-7
lines changed

3 files changed

+19
-7
lines changed

pandas/core/arrays/interval.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -829,6 +829,7 @@ def astype(self, dtype, copy: bool = True):
829829
"""
830830
from pandas import Index
831831
from pandas.core.arrays.string_ import StringDtype
832+
from pandas.core.arrays.string_arrow import ArrowStringDtype
832833

833834
if dtype is not None:
834835
dtype = pandas_dtype(dtype)
@@ -851,7 +852,7 @@ def astype(self, dtype, copy: bool = True):
851852
return self._shallow_copy(new_left, new_right)
852853
elif is_categorical_dtype(dtype):
853854
return Categorical(np.asarray(self), dtype=dtype)
854-
elif isinstance(dtype, StringDtype):
855+
elif isinstance(dtype, (StringDtype, ArrowStringDtype)):
855856
return dtype.construct_array_type()._from_sequence(self, copy=False)
856857

857858
# TODO: This try/except will be repeated.

pandas/core/arrays/string_arrow.py

+14-3
Original file line numberDiff line numberDiff line change
@@ -229,10 +229,21 @@ def _chk_pyarrow_available(cls) -> None:
229229

230230
@classmethod
231231
def _from_sequence(cls, scalars, dtype: Dtype | None = None, copy: bool = False):
232+
from pandas.core.arrays.masked import BaseMaskedArray
233+
232234
cls._chk_pyarrow_available()
233-
# convert non-na-likes to str, and nan-likes to ArrowStringDtype.na_value
234-
scalars = lib.ensure_string_array(scalars, copy=False)
235-
return cls(pa.array(scalars, type=pa.string(), from_pandas=True))
235+
236+
if isinstance(scalars, BaseMaskedArray):
237+
# avoid costly conversion to object dtype in ensure_string_array and
238+
# numerical issues with Float32Dtype
239+
na_values = scalars._mask
240+
result = scalars._data
241+
result = lib.ensure_string_array(result, copy=copy, convert_na_value=False)
242+
return cls(pa.array(result, mask=na_values, type=pa.string()))
243+
244+
# convert non-na-likes to str
245+
result = lib.ensure_string_array(scalars, copy=copy)
246+
return cls(pa.array(result, type=pa.string(), from_pandas=True))
236247

237248
@classmethod
238249
def _from_sequence_of_strings(

pandas/tests/extension/base/casting.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -43,10 +43,10 @@ def test_astype_str(self, data):
4343
expected = pd.Series([str(x) for x in data[:5]], dtype=str)
4444
self.assert_series_equal(result, expected)
4545

46-
def test_astype_string(self, data):
46+
def test_astype_string(self, data, nullable_string_dtype):
4747
# GH-33465
48-
result = pd.Series(data[:5]).astype("string")
49-
expected = pd.Series([str(x) for x in data[:5]], dtype="string")
48+
result = pd.Series(data[:5]).astype(nullable_string_dtype)
49+
expected = pd.Series([str(x) for x in data[:5]], dtype=nullable_string_dtype)
5050
self.assert_series_equal(result, expected)
5151

5252
def test_to_numpy(self, data):

0 commit comments

Comments
 (0)