Skip to content

Commit 22e7c08

Browse files
authored
Backport PR #52076 on branch 2.0.x (BUG: Fix regression when using Series with arrow string array) (#52121)
BUG: Fix regression when using Series with arrow string array (#52076) * BUG: Fix regression when using Series with arrow string array * Move
1 parent d211998 commit 22e7c08

File tree

4 files changed

+13
-1
lines changed

4 files changed

+13
-1
lines changed

pandas/_libs/lib.pyx

-1
Original file line numberDiff line numberDiff line change
@@ -750,7 +750,6 @@ cpdef ndarray[object] ensure_string_array(
750750
out = arr.astype(str).astype(object)
751751
out[arr.isna()] = na_value
752752
return out
753-
754753
arr = arr.to_numpy()
755754
elif not util.is_array(arr):
756755
arr = np.array(arr, dtype="object")

pandas/core/arrays/string_.py

+3
Original file line numberDiff line numberDiff line change
@@ -351,6 +351,9 @@ def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy: bool = Fal
351351
result[na_values] = libmissing.NA
352352

353353
else:
354+
if hasattr(scalars, "type"):
355+
# pyarrow array
356+
scalars = np.array(scalars)
354357
# convert non-na-likes to str, and nan-likes to StringDtype().na_value
355358
result = lib.ensure_string_array(scalars, na_value=libmissing.NA, copy=copy)
356359

pandas/core/arrays/string_arrow.py

+2
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,8 @@ def _from_sequence(cls, scalars, dtype: Dtype | None = None, copy: bool = False)
144144
result = scalars._data
145145
result = lib.ensure_string_array(result, copy=copy, convert_na_value=False)
146146
return cls(pa.array(result, mask=na_values, type=pa.string()))
147+
elif isinstance(scalars, (pa.Array, pa.ChunkedArray)):
148+
return cls(pc.cast(scalars, pa.string()))
147149

148150
# convert non-na-likes to str
149151
result = lib.ensure_string_array(scalars, copy=copy)

pandas/tests/extension/test_arrow.py

+8
Original file line numberDiff line numberDiff line change
@@ -2324,3 +2324,11 @@ def test_boolean_reduce_series_all_null(all_boolean_reductions, skipna):
23242324
else:
23252325
expected = pd.NA
23262326
assert result is expected
2327+
2328+
2329+
@pytest.mark.parametrize("dtype", ["string", "string[pyarrow]"])
2330+
def test_series_from_string_array(dtype):
2331+
arr = pa.array("the quick brown fox".split())
2332+
ser = pd.Series(arr, dtype=dtype)
2333+
expected = pd.Series(ArrowExtensionArray(arr), dtype=dtype)
2334+
tm.assert_series_equal(ser, expected)

0 commit comments

Comments
 (0)