Skip to content

Commit 850d3d4

Browse files
phoflpmhatre1
authored andcommitted
BUG: Use large_string in string array consistently (pandas-dev#58590)
1 parent e07e5d6 commit 850d3d4

File tree

1 file changed

+10
-6
lines changed

1 file changed

+10
-6
lines changed

pandas/core/arrays/string_arrow.py

+10-6
Original file line numberDiff line numberDiff line change
@@ -196,13 +196,13 @@ def _from_sequence(
196196
na_values = scalars._mask
197197
result = scalars._data
198198
result = lib.ensure_string_array(result, copy=copy, convert_na_value=False)
199-
return cls(pa.array(result, mask=na_values, type=pa.string()))
199+
return cls(pa.array(result, mask=na_values, type=pa.large_string()))
200200
elif isinstance(scalars, (pa.Array, pa.ChunkedArray)):
201-
return cls(pc.cast(scalars, pa.string()))
201+
return cls(pc.cast(scalars, pa.large_string()))
202202

203203
# convert non-na-likes to str
204204
result = lib.ensure_string_array(scalars, copy=copy)
205-
return cls(pa.array(result, type=pa.string(), from_pandas=True))
205+
return cls(pa.array(result, type=pa.large_string(), from_pandas=True))
206206

207207
@classmethod
208208
def _from_sequence_of_strings(
@@ -245,7 +245,7 @@ def isin(self, values: ArrayLike) -> npt.NDArray[np.bool_]:
245245
value_set = [
246246
pa_scalar.as_py()
247247
for pa_scalar in [pa.scalar(value, from_pandas=True) for value in values]
248-
if pa_scalar.type in (pa.string(), pa.null())
248+
if pa_scalar.type in (pa.string(), pa.null(), pa.large_string())
249249
]
250250

251251
# short-circuit to return all False array.
@@ -332,7 +332,9 @@ def _str_map(
332332
result = lib.map_infer_mask(
333333
arr, f, mask.view("uint8"), convert=False, na_value=na_value
334334
)
335-
result = pa.array(result, mask=mask, type=pa.string(), from_pandas=True)
335+
result = pa.array(
336+
result, mask=mask, type=pa.large_string(), from_pandas=True
337+
)
336338
return type(self)(result)
337339
else:
338340
# This is when the result type is object. We reach this when
@@ -655,7 +657,9 @@ def _str_map(
655657
result = lib.map_infer_mask(
656658
arr, f, mask.view("uint8"), convert=False, na_value=na_value
657659
)
658-
result = pa.array(result, mask=mask, type=pa.string(), from_pandas=True)
660+
result = pa.array(
661+
result, mask=mask, type=pa.large_string(), from_pandas=True
662+
)
659663
return type(self)(result)
660664
else:
661665
# This is when the result type is object. We reach this when

0 commit comments

Comments
 (0)