File tree 3 files changed +17
-0
lines changed 3 files changed +17
-0
lines changed Original file line number Diff line number Diff line change @@ -1920,6 +1920,13 @@ def as_column(
1920
1920
return col
1921
1921
1922
1922
elif isinstance (arbitrary , (pa .Array , pa .ChunkedArray )):
1923
+ if pa .types .is_large_string (arbitrary .type ):
1924
+ # Pandas-2.2+: Pandas defaults to `large_string` type
1925
+ # instead of `string` without data-introspection.
1926
+ # Temporary workaround until cudf has native
1927
+ # support for `LARGE_STRING` i.e., 64 bit offsets
1928
+ arbitrary = arbitrary .cast (pa .string ())
1929
+
1923
1930
if pa .types .is_float16 (arbitrary .type ):
1924
1931
raise NotImplementedError (
1925
1932
"Type casting from `float16` to `float32` is not "
Original file line number Diff line number Diff line change @@ -2700,3 +2700,11 @@ def test_series_dtype_astypes(data):
2700
2700
result = cudf .Series (data , dtype = "float64" )
2701
2701
expected = cudf .Series ([1.0 , 2.0 , 3.0 ])
2702
2702
assert_eq (result , expected )
2703
+
2704
+
2705
+ def test_series_from_large_string ():
2706
+ pa_large_string_array = pa .array (["a" , "b" , "c" ]).cast (pa .large_string ())
2707
+ got = cudf .Series (pa_large_string_array )
2708
+ expected = pd .Series (pa_large_string_array )
2709
+
2710
+ assert_eq (expected , got )
Original file line number Diff line number Diff line change @@ -213,6 +213,8 @@ def cudf_dtype_from_pa_type(typ):
213
213
return cudf .core .dtypes .StructDtype .from_arrow (typ )
214
214
elif pa .types .is_decimal (typ ):
215
215
return cudf .core .dtypes .Decimal128Dtype .from_arrow (typ )
216
+ elif pa .types .is_large_string (typ ):
217
+ return cudf .dtype ("str" )
216
218
else :
217
219
return cudf .api .types .pandas_dtype (typ .to_pandas_dtype ())
218
220
You can’t perform that action at this time.
0 commit comments