@@ -153,6 +153,32 @@ def data_for_grouping(dtype):
153
153
return pd .array ([B , B , None , None , A , A , B , C ], dtype = dtype )
154
154
155
155
156
+ @pytest .fixture
157
+ def data_for_sorting (data_for_grouping ):
158
+ """
159
+ Length-3 array with a known sort order.
160
+
161
+ This should be three items [B, C, A] with
162
+ A < B < C
163
+ """
164
+ return type (data_for_grouping )._from_sequence (
165
+ [data_for_grouping [0 ], data_for_grouping [7 ], data_for_grouping [4 ]]
166
+ )
167
+
168
+
169
+ @pytest .fixture
170
+ def data_missing_for_sorting (data_for_grouping ):
171
+ """
172
+ Length-3 array with a known sort order.
173
+
174
+ This should be three items [B, NA, A] with
175
+ A < B and NA missing.
176
+ """
177
+ return type (data_for_grouping )._from_sequence (
178
+ [data_for_grouping [0 ], data_for_grouping [2 ], data_for_grouping [4 ]]
179
+ )
180
+
181
+
156
182
@pytest .fixture
157
183
def na_value ():
158
184
"""The scalar missing value for this type. Default 'None'"""
@@ -654,7 +680,7 @@ def test_setitem_loc_scalar_single(self, data, using_array_manager, request):
654
680
if pa_version_under2p0 and tz not in (None , "UTC" ):
655
681
request .node .add_marker (
656
682
pytest .mark .xfail (
657
- reason = ( f"Not supported by pyarrow < 2.0 with timestamp type { tz } " )
683
+ reason = f"Not supported by pyarrow < 2.0 with timestamp type { tz } "
658
684
)
659
685
)
660
686
elif using_array_manager and pa .types .is_duration (data .dtype .pyarrow_dtype ):
@@ -988,6 +1014,287 @@ def test_EA_types(self, engine, data, request):
988
1014
super ().test_EA_types (engine , data )
989
1015
990
1016
1017
+ class TestBaseMethods (base .BaseMethodsTests ):
1018
+ @pytest .mark .parametrize ("dropna" , [True , False ])
1019
+ def test_value_counts (self , all_data , dropna , request ):
1020
+ pa_dtype = all_data .dtype .pyarrow_dtype
1021
+ if pa .types .is_date (pa_dtype ) or (
1022
+ pa .types .is_timestamp (pa_dtype ) and pa_dtype .tz is None
1023
+ ):
1024
+ request .node .add_marker (
1025
+ pytest .mark .xfail (
1026
+ raises = AttributeError ,
1027
+ reason = "GH 34986" ,
1028
+ )
1029
+ )
1030
+ elif pa .types .is_duration (pa_dtype ):
1031
+ request .node .add_marker (
1032
+ pytest .mark .xfail (
1033
+ raises = pa .ArrowNotImplementedError ,
1034
+ reason = f"value_count has no kernel for { pa_dtype } " ,
1035
+ )
1036
+ )
1037
+ super ().test_value_counts (all_data , dropna )
1038
+
1039
+ def test_value_counts_with_normalize (self , data , request ):
1040
+ pa_dtype = data .dtype .pyarrow_dtype
1041
+ if pa .types .is_date (pa_dtype ) or (
1042
+ pa .types .is_timestamp (pa_dtype ) and pa_dtype .tz is None
1043
+ ):
1044
+ request .node .add_marker (
1045
+ pytest .mark .xfail (
1046
+ raises = AttributeError ,
1047
+ reason = "GH 34986" ,
1048
+ )
1049
+ )
1050
+ elif pa .types .is_duration (pa_dtype ):
1051
+ request .node .add_marker (
1052
+ pytest .mark .xfail (
1053
+ raises = pa .ArrowNotImplementedError ,
1054
+ reason = f"value_count has no pyarrow kernel for { pa_dtype } " ,
1055
+ )
1056
+ )
1057
+ super ().test_value_counts_with_normalize (data )
1058
+
1059
+ def test_argmin_argmax (
1060
+ self , data_for_sorting , data_missing_for_sorting , na_value , request
1061
+ ):
1062
+ pa_dtype = data_for_sorting .dtype .pyarrow_dtype
1063
+ if pa .types .is_boolean (pa_dtype ):
1064
+ request .node .add_marker (
1065
+ pytest .mark .xfail (
1066
+ reason = f"{ pa_dtype } only has 2 unique possible values" ,
1067
+ )
1068
+ )
1069
+ super ().test_argmin_argmax (data_for_sorting , data_missing_for_sorting , na_value )
1070
+
1071
+ @pytest .mark .parametrize ("ascending" , [True , False ])
1072
+ def test_sort_values (self , data_for_sorting , ascending , sort_by_key , request ):
1073
+ pa_dtype = data_for_sorting .dtype .pyarrow_dtype
1074
+ if pa .types .is_duration (pa_dtype ) and not ascending and not pa_version_under2p0 :
1075
+ request .node .add_marker (
1076
+ pytest .mark .xfail (
1077
+ raises = pa .ArrowNotImplementedError ,
1078
+ reason = (
1079
+ f"unique has no pyarrow kernel "
1080
+ f"for { pa_dtype } when ascending={ ascending } "
1081
+ ),
1082
+ )
1083
+ )
1084
+ super ().test_sort_values (data_for_sorting , ascending , sort_by_key )
1085
+
1086
+ @pytest .mark .parametrize ("ascending" , [True , False ])
1087
+ def test_sort_values_frame (self , data_for_sorting , ascending , request ):
1088
+ pa_dtype = data_for_sorting .dtype .pyarrow_dtype
1089
+ if pa .types .is_duration (pa_dtype ):
1090
+ request .node .add_marker (
1091
+ pytest .mark .xfail (
1092
+ raises = pa .ArrowNotImplementedError ,
1093
+ reason = (
1094
+ f"dictionary_encode has no pyarrow kernel "
1095
+ f"for { pa_dtype } when ascending={ ascending } "
1096
+ ),
1097
+ )
1098
+ )
1099
+ super ().test_sort_values_frame (data_for_sorting , ascending )
1100
+
1101
+ @pytest .mark .parametrize ("box" , [pd .Series , lambda x : x ])
1102
+ @pytest .mark .parametrize ("method" , [lambda x : x .unique (), pd .unique ])
1103
+ def test_unique (self , data , box , method , request ):
1104
+ pa_dtype = data .dtype .pyarrow_dtype
1105
+ if pa .types .is_duration (pa_dtype ) and not pa_version_under2p0 :
1106
+ request .node .add_marker (
1107
+ pytest .mark .xfail (
1108
+ raises = pa .ArrowNotImplementedError ,
1109
+ reason = f"unique has no pyarrow kernel for { pa_dtype } ." ,
1110
+ )
1111
+ )
1112
+ super ().test_unique (data , box , method )
1113
+
1114
+ @pytest .mark .parametrize ("na_sentinel" , [- 1 , - 2 ])
1115
+ def test_factorize (self , data_for_grouping , na_sentinel , request ):
1116
+ pa_dtype = data_for_grouping .dtype .pyarrow_dtype
1117
+ if pa .types .is_duration (pa_dtype ):
1118
+ request .node .add_marker (
1119
+ pytest .mark .xfail (
1120
+ raises = pa .ArrowNotImplementedError ,
1121
+ reason = f"dictionary_encode has no pyarrow kernel for { pa_dtype } " ,
1122
+ )
1123
+ )
1124
+ elif pa .types .is_boolean (pa_dtype ):
1125
+ request .node .add_marker (
1126
+ pytest .mark .xfail (
1127
+ reason = f"{ pa_dtype } only has 2 unique possible values" ,
1128
+ )
1129
+ )
1130
+ super ().test_factorize (data_for_grouping , na_sentinel )
1131
+
1132
+ @pytest .mark .parametrize ("na_sentinel" , [- 1 , - 2 ])
1133
+ def test_factorize_equivalence (self , data_for_grouping , na_sentinel , request ):
1134
+ pa_dtype = data_for_grouping .dtype .pyarrow_dtype
1135
+ if pa .types .is_duration (pa_dtype ):
1136
+ request .node .add_marker (
1137
+ pytest .mark .xfail (
1138
+ raises = pa .ArrowNotImplementedError ,
1139
+ reason = f"dictionary_encode has no pyarrow kernel for { pa_dtype } " ,
1140
+ )
1141
+ )
1142
+ super ().test_factorize_equivalence (data_for_grouping , na_sentinel )
1143
+
1144
+ def test_factorize_empty (self , data , request ):
1145
+ pa_dtype = data .dtype .pyarrow_dtype
1146
+ if pa .types .is_duration (pa_dtype ):
1147
+ request .node .add_marker (
1148
+ pytest .mark .xfail (
1149
+ raises = pa .ArrowNotImplementedError ,
1150
+ reason = f"dictionary_encode has no pyarrow kernel for { pa_dtype } " ,
1151
+ )
1152
+ )
1153
+ super ().test_factorize_empty (data )
1154
+
1155
+ def test_fillna_copy_frame (self , data_missing , request , using_array_manager ):
1156
+ pa_dtype = data_missing .dtype .pyarrow_dtype
1157
+ if using_array_manager and pa .types .is_duration (pa_dtype ):
1158
+ request .node .add_marker (
1159
+ pytest .mark .xfail (
1160
+ reason = f"Checking ndim when using arraymanager with { pa_dtype } "
1161
+ )
1162
+ )
1163
+ super ().test_fillna_copy_frame (data_missing )
1164
+
1165
+ def test_fillna_copy_series (self , data_missing , request , using_array_manager ):
1166
+ pa_dtype = data_missing .dtype .pyarrow_dtype
1167
+ if using_array_manager and pa .types .is_duration (pa_dtype ):
1168
+ request .node .add_marker (
1169
+ pytest .mark .xfail (
1170
+ reason = f"Checking ndim when using arraymanager with { pa_dtype } "
1171
+ )
1172
+ )
1173
+ super ().test_fillna_copy_series (data_missing )
1174
+
1175
+ def test_shift_fill_value (self , data , request ):
1176
+ pa_dtype = data .dtype .pyarrow_dtype
1177
+ tz = getattr (pa_dtype , "tz" , None )
1178
+ if pa_version_under2p0 and tz not in (None , "UTC" ):
1179
+ request .node .add_marker (
1180
+ pytest .mark .xfail (
1181
+ reason = f"Not supported by pyarrow < 2.0 with timestamp type { tz } "
1182
+ )
1183
+ )
1184
+ super ().test_shift_fill_value (data )
1185
+
1186
+ @pytest .mark .parametrize ("repeats" , [0 , 1 , 2 , [1 , 2 , 3 ]])
1187
+ def test_repeat (self , data , repeats , as_series , use_numpy , request ):
1188
+ pa_dtype = data .dtype .pyarrow_dtype
1189
+ tz = getattr (pa_dtype , "tz" , None )
1190
+ if pa_version_under2p0 and tz not in (None , "UTC" ) and repeats != 0 :
1191
+ request .node .add_marker (
1192
+ pytest .mark .xfail (
1193
+ reason = (
1194
+ f"Not supported by pyarrow < 2.0 with "
1195
+ f"timestamp type { tz } when repeats={ repeats } "
1196
+ )
1197
+ )
1198
+ )
1199
+ super ().test_repeat (data , repeats , as_series , use_numpy )
1200
+
1201
+ def test_insert (self , data , request ):
1202
+ pa_dtype = data .dtype .pyarrow_dtype
1203
+ tz = getattr (pa_dtype , "tz" , None )
1204
+ if pa_version_under2p0 and tz not in (None , "UTC" ):
1205
+ request .node .add_marker (
1206
+ pytest .mark .xfail (
1207
+ reason = f"Not supported by pyarrow < 2.0 with timestamp type { tz } "
1208
+ )
1209
+ )
1210
+ super ().test_insert (data )
1211
+
1212
+ def test_combine_first (self , data , request , using_array_manager ):
1213
+ pa_dtype = data .dtype .pyarrow_dtype
1214
+ tz = getattr (pa_dtype , "tz" , None )
1215
+ if using_array_manager and pa .types .is_duration (pa_dtype ):
1216
+ request .node .add_marker (
1217
+ pytest .mark .xfail (
1218
+ reason = f"Checking ndim when using arraymanager with { pa_dtype } "
1219
+ )
1220
+ )
1221
+ elif pa_version_under2p0 and tz not in (None , "UTC" ):
1222
+ request .node .add_marker (
1223
+ pytest .mark .xfail (
1224
+ reason = f"Not supported by pyarrow < 2.0 with timestamp type { tz } "
1225
+ )
1226
+ )
1227
+ super ().test_combine_first (data )
1228
+
1229
+ @pytest .mark .parametrize ("frame" , [True , False ])
1230
+ @pytest .mark .parametrize (
1231
+ "periods, indices" ,
1232
+ [(- 2 , [2 , 3 , 4 , - 1 , - 1 ]), (0 , [0 , 1 , 2 , 3 , 4 ]), (2 , [- 1 , - 1 , 0 , 1 , 2 ])],
1233
+ )
1234
+ def test_container_shift (
1235
+ self , data , frame , periods , indices , request , using_array_manager
1236
+ ):
1237
+ pa_dtype = data .dtype .pyarrow_dtype
1238
+ if (
1239
+ using_array_manager
1240
+ and pa .types .is_duration (pa_dtype )
1241
+ and periods in (- 2 , 2 )
1242
+ ):
1243
+ request .node .add_marker (
1244
+ pytest .mark .xfail (
1245
+ reason = (
1246
+ f"Checking ndim when using arraymanager with "
1247
+ f"{ pa_dtype } and periods={ periods } "
1248
+ )
1249
+ )
1250
+ )
1251
+ super ().test_container_shift (data , frame , periods , indices )
1252
+
1253
+ @pytest .mark .xfail (
1254
+ reason = "result dtype pyarrow[bool] better than expected dtype object"
1255
+ )
1256
+ def test_combine_le (self , data_repeated ):
1257
+ super ().test_combine_le (data_repeated )
1258
+
1259
+ def test_combine_add (self , data_repeated , request ):
1260
+ pa_dtype = next (data_repeated (1 )).dtype .pyarrow_dtype
1261
+ if pa .types .is_temporal (pa_dtype ):
1262
+ request .node .add_marker (
1263
+ pytest .mark .xfail (
1264
+ raises = TypeError ,
1265
+ reason = f"{ pa_dtype } cannot be added to { pa_dtype } " ,
1266
+ )
1267
+ )
1268
+ super ().test_combine_add (data_repeated )
1269
+
1270
+ def test_searchsorted (self , data_for_sorting , as_series , request ):
1271
+ pa_dtype = data_for_sorting .dtype .pyarrow_dtype
1272
+ if pa .types .is_boolean (pa_dtype ):
1273
+ request .node .add_marker (
1274
+ pytest .mark .xfail (
1275
+ reason = f"{ pa_dtype } only has 2 unique possible values" ,
1276
+ )
1277
+ )
1278
+ super ().test_searchsorted (data_for_sorting , as_series )
1279
+
1280
+ def test_where_series (self , data , na_value , as_frame , request , using_array_manager ):
1281
+ pa_dtype = data .dtype .pyarrow_dtype
1282
+ if using_array_manager and pa .types .is_duration (pa_dtype ):
1283
+ request .node .add_marker (
1284
+ pytest .mark .xfail (
1285
+ reason = f"Checking ndim when using arraymanager with { pa_dtype } "
1286
+ )
1287
+ )
1288
+ elif pa .types .is_temporal (pa_dtype ):
1289
+ request .node .add_marker (
1290
+ pytest .mark .xfail (
1291
+ raises = pa .ArrowNotImplementedError ,
1292
+ reason = f"Unsupported cast from double to { pa_dtype } " ,
1293
+ )
1294
+ )
1295
+ super ().test_where_series (data , na_value , as_frame )
1296
+
1297
+
991
1298
def test_arrowdtype_construct_from_string_type_with_unsupported_parameters ():
992
1299
with pytest .raises (NotImplementedError , match = "Passing pyarrow type" ):
993
1300
ArrowDtype .construct_from_string ("timestamp[s, tz=UTC][pyarrow]" )
0 commit comments