16
16
time ,
17
17
timedelta ,
18
18
)
19
+ from io import (
20
+ BytesIO ,
21
+ StringIO ,
22
+ )
19
23
20
24
import numpy as np
21
25
import pytest
@@ -90,6 +94,10 @@ def data(dtype):
90
94
+ [None ]
91
95
+ [time (0 , 5 ), time (5 , 0 )]
92
96
)
97
+ elif pa .types .is_string (pa_dtype ):
98
+ data = ["a" , "b" ] * 4 + [None ] + ["1" , "2" ] * 44 + [None ] + ["!" , ">" ]
99
+ elif pa .types .is_binary (pa_dtype ):
100
+ data = [b"a" , b"b" ] * 4 + [None ] + [b"1" , b"2" ] * 44 + [None ] + [b"!" , b">" ]
93
101
else :
94
102
raise NotImplementedError
95
103
return pd .array (data , dtype = dtype )
@@ -155,6 +163,14 @@ def data_for_grouping(dtype):
155
163
A = time (0 , 0 )
156
164
B = time (0 , 12 )
157
165
C = time (12 , 12 )
166
+ elif pa .types .is_string (pa_dtype ):
167
+ A = "a"
168
+ B = "b"
169
+ C = "c"
170
+ elif pa .types .is_binary (pa_dtype ):
171
+ A = b"a"
172
+ B = b"b"
173
+ C = b"c"
158
174
else :
159
175
raise NotImplementedError
160
176
return pd .array ([B , B , None , None , A , A , B , C ], dtype = dtype )
@@ -203,17 +219,30 @@ def na_value():
203
219
204
220
205
221
class TestBaseCasting (base .BaseCastingTests ):
206
- pass
222
+ def test_astype_str (self , data , request ):
223
+ pa_dtype = data .dtype .pyarrow_dtype
224
+ if pa .types .is_binary (pa_dtype ):
225
+ request .node .add_marker (
226
+ pytest .mark .xfail (
227
+ reason = f"For { pa_dtype } .astype(str) decodes." ,
228
+ )
229
+ )
230
+ super ().test_astype_str (data )
207
231
208
232
209
233
class TestConstructors (base .BaseConstructorsTests ):
210
234
def test_from_dtype (self , data , request ):
211
235
pa_dtype = data .dtype .pyarrow_dtype
212
- if pa .types .is_timestamp (pa_dtype ) and pa_dtype .tz :
236
+ if (pa .types .is_timestamp (pa_dtype ) and pa_dtype .tz ) or pa .types .is_string (
237
+ pa_dtype
238
+ ):
239
+ if pa .types .is_string (pa_dtype ):
240
+ reason = "ArrowDtype(pa.string()) != StringDtype('pyarrow')"
241
+ else :
242
+ reason = f"pyarrow.type_for_alias cannot infer { pa_dtype } "
213
243
request .node .add_marker (
214
244
pytest .mark .xfail (
215
- raises = NotImplementedError ,
216
- reason = f"pyarrow.type_for_alias cannot infer { pa_dtype } " ,
245
+ reason = reason ,
217
246
)
218
247
)
219
248
super ().test_from_dtype (data )
@@ -302,7 +331,7 @@ class TestGetitemTests(base.BaseGetitemTests):
302
331
reason = (
303
332
"data.dtype.type return pyarrow.DataType "
304
333
"but this (intentionally) returns "
305
- "Python scalars or pd.Na "
334
+ "Python scalars or pd.NA "
306
335
)
307
336
)
308
337
def test_getitem_scalar (self , data ):
@@ -361,7 +390,11 @@ def test_reduce_series(self, data, all_numeric_reductions, skipna, request):
361
390
or pa .types .is_boolean (pa_dtype )
362
391
) and not (
363
392
all_numeric_reductions in {"min" , "max" }
364
- and (pa .types .is_temporal (pa_dtype ) and not pa .types .is_duration (pa_dtype ))
393
+ and (
394
+ (pa .types .is_temporal (pa_dtype ) and not pa .types .is_duration (pa_dtype ))
395
+ or pa .types .is_string (pa_dtype )
396
+ or pa .types .is_binary (pa_dtype )
397
+ )
365
398
):
366
399
request .node .add_marker (xfail_mark )
367
400
elif pa .types .is_boolean (pa_dtype ) and all_numeric_reductions in {
@@ -494,6 +527,16 @@ def test_construct_from_string_own_name(self, dtype, request):
494
527
reason = f"pyarrow.type_for_alias cannot infer { pa_dtype } " ,
495
528
)
496
529
)
530
+ elif pa .types .is_string (pa_dtype ):
531
+ request .node .add_marker (
532
+ pytest .mark .xfail (
533
+ raises = TypeError ,
534
+ reason = (
535
+ "Still support StringDtype('pyarrow') "
536
+ "over ArrowDtype(pa.string())"
537
+ ),
538
+ )
539
+ )
497
540
super ().test_construct_from_string_own_name (dtype )
498
541
499
542
def test_is_dtype_from_name (self , dtype , request ):
@@ -505,6 +548,15 @@ def test_is_dtype_from_name(self, dtype, request):
505
548
reason = f"pyarrow.type_for_alias cannot infer { pa_dtype } " ,
506
549
)
507
550
)
551
+ elif pa .types .is_string (pa_dtype ):
552
+ request .node .add_marker (
553
+ pytest .mark .xfail (
554
+ reason = (
555
+ "Still support StringDtype('pyarrow') "
556
+ "over ArrowDtype(pa.string())"
557
+ ),
558
+ )
559
+ )
508
560
super ().test_is_dtype_from_name (dtype )
509
561
510
562
def test_construct_from_string (self , dtype , request ):
@@ -516,6 +568,16 @@ def test_construct_from_string(self, dtype, request):
516
568
reason = f"pyarrow.type_for_alias cannot infer { pa_dtype } " ,
517
569
)
518
570
)
571
+ elif pa .types .is_string (pa_dtype ):
572
+ request .node .add_marker (
573
+ pytest .mark .xfail (
574
+ raises = TypeError ,
575
+ reason = (
576
+ "Still support StringDtype('pyarrow') "
577
+ "over ArrowDtype(pa.string())"
578
+ ),
579
+ )
580
+ )
519
581
super ().test_construct_from_string (dtype )
520
582
521
583
def test_construct_from_string_another_type_raises (self , dtype ):
@@ -533,6 +595,8 @@ def test_get_common_dtype(self, dtype, request):
533
595
and (pa_dtype .unit != "ns" or pa_dtype .tz is not None )
534
596
)
535
597
or (pa .types .is_duration (pa_dtype ) and pa_dtype .unit != "ns" )
598
+ or pa .types .is_string (pa_dtype )
599
+ or pa .types .is_binary (pa_dtype )
536
600
):
537
601
request .node .add_marker (
538
602
pytest .mark .xfail (
@@ -592,7 +656,21 @@ def test_EA_types(self, engine, data, request):
592
656
reason = f"Parameterized types with tz={ pa_dtype .tz } not supported." ,
593
657
)
594
658
)
595
- super ().test_EA_types (engine , data )
659
+ elif pa .types .is_binary (pa_dtype ):
660
+ request .node .add_marker (
661
+ pytest .mark .xfail (reason = "CSV parsers don't correctly handle binary" )
662
+ )
663
+ df = pd .DataFrame ({"with_dtype" : pd .Series (data , dtype = str (data .dtype ))})
664
+ csv_output = df .to_csv (index = False , na_rep = np .nan )
665
+ if pa .types .is_binary (pa_dtype ):
666
+ csv_output = BytesIO (csv_output )
667
+ else :
668
+ csv_output = StringIO (csv_output )
669
+ result = pd .read_csv (
670
+ csv_output , dtype = {"with_dtype" : str (data .dtype )}, engine = engine
671
+ )
672
+ expected = df
673
+ self .assert_frame_equal (result , expected )
596
674
597
675
598
676
class TestBaseUnaryOps (base .BaseUnaryOpsTests ):
@@ -899,7 +977,11 @@ def test_arith_series_with_scalar(
899
977
or all_arithmetic_operators in ("__sub__" , "__rsub__" )
900
978
and pa .types .is_temporal (pa_dtype )
901
979
)
902
- if all_arithmetic_operators in {
980
+ if all_arithmetic_operators == "__rmod__" and (
981
+ pa .types .is_string (pa_dtype ) or pa .types .is_binary (pa_dtype )
982
+ ):
983
+ pytest .skip ("Skip testing Python string formatting" )
984
+ elif all_arithmetic_operators in {
903
985
"__mod__" ,
904
986
"__rmod__" ,
905
987
}:
@@ -965,7 +1047,11 @@ def test_arith_frame_with_scalar(
965
1047
or all_arithmetic_operators in ("__sub__" , "__rsub__" )
966
1048
and pa .types .is_temporal (pa_dtype )
967
1049
)
968
- if all_arithmetic_operators in {
1050
+ if all_arithmetic_operators == "__rmod__" and (
1051
+ pa .types .is_string (pa_dtype ) or pa .types .is_binary (pa_dtype )
1052
+ ):
1053
+ pytest .skip ("Skip testing Python string formatting" )
1054
+ elif all_arithmetic_operators in {
969
1055
"__mod__" ,
970
1056
"__rmod__" ,
971
1057
}:
@@ -1224,7 +1310,11 @@ def test_quantile(data, interpolation, quantile, request):
1224
1310
)
1225
1311
def test_mode (data_for_grouping , dropna , take_idx , exp_idx , request ):
1226
1312
pa_dtype = data_for_grouping .dtype .pyarrow_dtype
1227
- if pa .types .is_temporal (pa_dtype ):
1313
+ if (
1314
+ pa .types .is_temporal (pa_dtype )
1315
+ or pa .types .is_string (pa_dtype )
1316
+ or pa .types .is_binary (pa_dtype )
1317
+ ):
1228
1318
request .node .add_marker (
1229
1319
pytest .mark .xfail (
1230
1320
raises = pa .ArrowNotImplementedError ,
0 commit comments