@@ -190,13 +190,13 @@ def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy: bool = Fal
190
190
na_values = scalars ._mask
191
191
result = scalars ._data
192
192
result = lib .ensure_string_array (result , copy = copy , convert_na_value = False )
193
- return cls (pa .array (result , mask = na_values , type = pa .string ()))
193
+ return cls (pa .array (result , mask = na_values , type = pa .large_string ()))
194
194
elif isinstance (scalars , (pa .Array , pa .ChunkedArray )):
195
- return cls (pc .cast (scalars , pa .string ()))
195
+ return cls (pc .cast (scalars , pa .large_string ()))
196
196
197
197
# convert non-na-likes to str
198
198
result = lib .ensure_string_array (scalars , copy = copy )
199
- return cls (pa .array (result , type = pa .string (), from_pandas = True ))
199
+ return cls (pa .array (result , type = pa .large_string (), from_pandas = True ))
200
200
201
201
@classmethod
202
202
def _from_sequence_of_strings (
@@ -239,7 +239,7 @@ def isin(self, values: ArrayLike) -> npt.NDArray[np.bool_]:
239
239
value_set = [
240
240
pa_scalar .as_py ()
241
241
for pa_scalar in [pa .scalar (value , from_pandas = True ) for value in values ]
242
- if pa_scalar .type in (pa .string (), pa .null ())
242
+ if pa_scalar .type in (pa .string (), pa .null (), pa . large_string () )
243
243
]
244
244
245
245
# short-circuit to return all False array.
@@ -337,7 +337,9 @@ def _str_map(
337
337
result = lib .map_infer_mask (
338
338
arr , f , mask .view ("uint8" ), convert = False , na_value = na_value
339
339
)
340
- result = pa .array (result , mask = mask , type = pa .string (), from_pandas = True )
340
+ result = pa .array (
341
+ result , mask = mask , type = pa .large_string (), from_pandas = True
342
+ )
341
343
return type (self )(result )
342
344
else :
343
345
# This is when the result type is object. We reach this when
@@ -658,7 +660,9 @@ def _str_map(
658
660
result = lib .map_infer_mask (
659
661
arr , f , mask .view ("uint8" ), convert = False , na_value = na_value
660
662
)
661
- result = pa .array (result , mask = mask , type = pa .string (), from_pandas = True )
663
+ result = pa .array (
664
+ result , mask = mask , type = pa .large_string (), from_pandas = True
665
+ )
662
666
return type (self )(result )
663
667
else :
664
668
# This is when the result type is object. We reach this when
0 commit comments