Skip to content

Commit c15d823

Browse files
ENH (string dtype): accept string_view in addition to string/large_string for ArrowStringArray input (#60222)
1 parent 4b04a2f commit c15d823

File tree

2 files changed

+21
-0
lines changed

2 files changed

+21
-0
lines changed

pandas/core/arrays/string_arrow.py

+7
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from pandas.compat import (
1818
pa_version_under10p1,
1919
pa_version_under13p0,
20+
pa_version_under16p0,
2021
)
2122
from pandas.util._exceptions import find_stack_level
2223

@@ -71,6 +72,10 @@ def _chk_pyarrow_available() -> None:
7172
raise ImportError(msg)
7273

7374

75+
def _is_string_view(typ):
76+
return not pa_version_under16p0 and pa.types.is_string_view(typ)
77+
78+
7479
# TODO: Inherit directly from BaseStringArrayMethods. Currently we inherit from
7580
# ObjectStringArrayMixin because we want to have the object-dtype based methods as
7681
# fallback for the ones that pyarrow doesn't yet support
@@ -128,11 +133,13 @@ def __init__(self, values) -> None:
128133
_chk_pyarrow_available()
129134
if isinstance(values, (pa.Array, pa.ChunkedArray)) and (
130135
pa.types.is_string(values.type)
136+
or _is_string_view(values.type)
131137
or (
132138
pa.types.is_dictionary(values.type)
133139
and (
134140
pa.types.is_string(values.type.value_type)
135141
or pa.types.is_large_string(values.type.value_type)
142+
or _is_string_view(values.type.value_type)
136143
)
137144
)
138145
):

pandas/tests/arrays/string_/test_string_arrow.py

+14
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,20 @@ def test_constructor_valid_string_type_value_dictionary(string_type, chunked):
9999
assert pa.types.is_large_string(arr._pa_array.type)
100100

101101

102+
@pytest.mark.parametrize("chunked", [True, False])
103+
def test_constructor_valid_string_view(chunked):
104+
# requires pyarrow>=18 for casting string_view to string
105+
pa = pytest.importorskip("pyarrow", minversion="18")
106+
107+
arr = pa.array(["1", "2", "3"], pa.string_view())
108+
if chunked:
109+
arr = pa.chunked_array(arr)
110+
111+
arr = ArrowStringArray(arr)
112+
# dictionary type get converted to dense large string array
113+
assert pa.types.is_large_string(arr._pa_array.type)
114+
115+
102116
def test_constructor_from_list():
103117
# GH#27673
104118
pytest.importorskip("pyarrow")

0 commit comments

Comments
 (0)