26
26
from pandas .tests .extension import base
27
27
28
28
29
+ def split_array (arr ):
30
+ if not isinstance (arr .dtype , ArrowStringDtype ):
31
+ pytest .skip ("chunked array n/a" )
32
+
33
+ def _split_array (arr ):
34
+ import pyarrow as pa
35
+
36
+ arrow_array = arr ._data
37
+ split = len (arrow_array ) // 2
38
+ arrow_array = pa .chunked_array (
39
+ [* arrow_array [:split ].chunks , * arrow_array [split :].chunks ]
40
+ )
41
+ assert arrow_array .num_chunks == 2
42
+ return type (arr )(arrow_array )
43
+
44
+ return _split_array (arr )
45
+
46
+
47
+ @pytest .fixture (params = [True , False ])
48
+ def chunked (request ):
49
+ return request .param
50
+
51
+
29
52
@pytest .fixture (
30
53
params = [
31
54
StringDtype ,
@@ -39,28 +62,32 @@ def dtype(request):
39
62
40
63
41
64
@pytest .fixture
42
- def data (dtype ):
65
+ def data (dtype , chunked ):
43
66
strings = np .random .choice (list (string .ascii_letters ), size = 100 )
44
67
while strings [0 ] == strings [1 ]:
45
68
strings = np .random .choice (list (string .ascii_letters ), size = 100 )
46
69
47
- return dtype .construct_array_type ()._from_sequence (strings )
70
+ arr = dtype .construct_array_type ()._from_sequence (strings )
71
+ return split_array (arr ) if chunked else arr
48
72
49
73
50
74
@pytest .fixture
51
- def data_missing (dtype ):
75
+ def data_missing (dtype , chunked ):
52
76
"""Length 2 array with [NA, Valid]"""
53
- return dtype .construct_array_type ()._from_sequence ([pd .NA , "A" ])
77
+ arr = dtype .construct_array_type ()._from_sequence ([pd .NA , "A" ])
78
+ return split_array (arr ) if chunked else arr
54
79
55
80
56
81
@pytest .fixture
57
- def data_for_sorting (dtype ):
58
- return dtype .construct_array_type ()._from_sequence (["B" , "C" , "A" ])
82
+ def data_for_sorting (dtype , chunked ):
83
+ arr = dtype .construct_array_type ()._from_sequence (["B" , "C" , "A" ])
84
+ return split_array (arr ) if chunked else arr
59
85
60
86
61
87
@pytest .fixture
62
- def data_missing_for_sorting (dtype ):
63
- return dtype .construct_array_type ()._from_sequence (["B" , pd .NA , "A" ])
88
+ def data_missing_for_sorting (dtype , chunked ):
89
+ arr = dtype .construct_array_type ()._from_sequence (["B" , pd .NA , "A" ])
90
+ return split_array (arr ) if chunked else arr
64
91
65
92
66
93
@pytest .fixture
@@ -69,10 +96,11 @@ def na_value():
69
96
70
97
71
98
@pytest .fixture
72
- def data_for_grouping (dtype ):
73
- return dtype .construct_array_type ()._from_sequence (
99
+ def data_for_grouping (dtype , chunked ):
100
+ arr = dtype .construct_array_type ()._from_sequence (
74
101
["B" , "B" , pd .NA , pd .NA , "A" , "A" , "B" , "C" ]
75
102
)
103
+ return split_array (arr ) if chunked else arr
76
104
77
105
78
106
class TestDtype (base .BaseDtypeTests ):
0 commit comments