diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index a9fcf6b28953b..88ea61a23a426 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -752,7 +752,6 @@ cpdef ndarray[object] ensure_string_array( out = arr.astype(str).astype(object) out[arr.isna()] = na_value return out - arr = arr.to_numpy() elif not util.is_array(arr): arr = np.array(arr, dtype="object") diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index 89f44de3386c7..4bd95da2b6b07 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -352,6 +352,9 @@ def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy: bool = Fal result[na_values] = libmissing.NA else: + if hasattr(scalars, "type"): + # pyarrow array + scalars = np.array(scalars) # convert non-na-likes to str, and nan-likes to StringDtype().na_value result = lib.ensure_string_array(scalars, na_value=libmissing.NA, copy=copy) diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index d5c1f98e63f14..19d040618ec99 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -151,6 +151,8 @@ def _from_sequence(cls, scalars, dtype: Dtype | None = None, copy: bool = False) result = scalars._data result = lib.ensure_string_array(result, copy=copy, convert_na_value=False) return cls(pa.array(result, mask=na_values, type=pa.string())) + elif isinstance(scalars, (pa.Array, pa.ChunkedArray)): + return cls(pc.cast(scalars, pa.string())) # convert non-na-likes to str result = lib.ensure_string_array(scalars, copy=copy) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 517626f8c2abb..1cdff54cd6df6 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -2353,6 +2353,14 @@ def test_concat_empty_arrow_backed_series(dtype): tm.assert_series_equal(result, expected) +@pytest.mark.parametrize("dtype", ["string", "string[pyarrow]"]) +def test_series_from_string_array(dtype): + arr = pa.array("the quick brown fox".split()) + ser = pd.Series(arr, dtype=dtype) + expected = pd.Series(ArrowExtensionArray(arr), dtype=dtype) + tm.assert_series_equal(ser, expected) + + # _data was renamed to _pa_data class OldArrowExtensionArray(ArrowExtensionArray): def __getstate__(self):