Skip to content

Commit 7e92ca6

Browse files
authored
BUG: Fix regression when using Series with arrow string array (#52076)
* BUG: Fix regression when using Series with arrow string array * Move
1 parent 5c15588 commit 7e92ca6

File tree

4 files changed

+13
-1
lines changed

4 files changed

+13
-1
lines changed

pandas/_libs/lib.pyx

-1
Original file line numberDiff line numberDiff line change
@@ -752,7 +752,6 @@ cpdef ndarray[object] ensure_string_array(
752752
out = arr.astype(str).astype(object)
753753
out[arr.isna()] = na_value
754754
return out
755-
756755
arr = arr.to_numpy()
757756
elif not util.is_array(arr):
758757
arr = np.array(arr, dtype="object")

pandas/core/arrays/string_.py

+3
Original file line numberDiff line numberDiff line change
@@ -352,6 +352,9 @@ def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy: bool = Fal
352352
result[na_values] = libmissing.NA
353353

354354
else:
355+
if hasattr(scalars, "type"):
356+
# pyarrow array
357+
scalars = np.array(scalars)
355358
# convert non-na-likes to str, and nan-likes to StringDtype().na_value
356359
result = lib.ensure_string_array(scalars, na_value=libmissing.NA, copy=copy)
357360

pandas/core/arrays/string_arrow.py

+2
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,8 @@ def _from_sequence(cls, scalars, dtype: Dtype | None = None, copy: bool = False)
151151
result = scalars._data
152152
result = lib.ensure_string_array(result, copy=copy, convert_na_value=False)
153153
return cls(pa.array(result, mask=na_values, type=pa.string()))
154+
elif isinstance(scalars, (pa.Array, pa.ChunkedArray)):
155+
return cls(pc.cast(scalars, pa.string()))
154156

155157
# convert non-na-likes to str
156158
result = lib.ensure_string_array(scalars, copy=copy)

pandas/tests/extension/test_arrow.py

+8
Original file line numberDiff line numberDiff line change
@@ -2353,6 +2353,14 @@ def test_concat_empty_arrow_backed_series(dtype):
23532353
tm.assert_series_equal(result, expected)
23542354

23552355

2356+
@pytest.mark.parametrize("dtype", ["string", "string[pyarrow]"])
2357+
def test_series_from_string_array(dtype):
2358+
arr = pa.array("the quick brown fox".split())
2359+
ser = pd.Series(arr, dtype=dtype)
2360+
expected = pd.Series(ArrowExtensionArray(arr), dtype=dtype)
2361+
tm.assert_series_equal(ser, expected)
2362+
2363+
23562364
# _data was renamed to _pa_data
23572365
class OldArrowExtensionArray(ArrowExtensionArray):
23582366
def __getstate__(self):

0 commit comments

Comments
 (0)