13
13
14
14
from pandas ._config import get_option
15
15
16
- from pandas .compat import is_platform_windows
16
+ from pandas .compat import (
17
+ PY310 ,
18
+ is_platform_windows ,
19
+ )
17
20
from pandas .compat .pyarrow import (
18
21
pa_version_under2p0 ,
19
22
pa_version_under5p0 ,
@@ -262,6 +265,7 @@ def test_options_py(df_compat, pa):
262
265
check_round_trip (df_compat )
263
266
264
267
268
+ @pytest .mark .xfail (PY310 , reason = "fastparquet failing on 3.10" )
265
269
def test_options_fp (df_compat , fp ):
266
270
# use the set option
267
271
@@ -339,6 +343,7 @@ def test_get_engine_auto_error_message():
339
343
get_engine ("auto" )
340
344
341
345
346
+ @pytest .mark .xfail (PY310 , reason = "fastparquet failing on 3.10" )
342
347
def test_cross_engine_pa_fp (df_cross_compat , pa , fp ):
343
348
# cross-compat with differing reading/writing engines
344
349
@@ -404,7 +409,11 @@ def test_error(self, engine):
404
409
msg = "to_parquet only supports IO with DataFrames"
405
410
self .check_error_on_write (obj , engine , ValueError , msg )
406
411
407
- def test_columns_dtypes (self , engine ):
412
+ def test_columns_dtypes (self , request , engine ):
413
+ if PY310 and engine == "fastparquet" :
414
+ request .node .add_marker (
415
+ pytest .mark .xfail (reason = "fastparquet failing on 3.10" )
416
+ )
408
417
df = pd .DataFrame ({"string" : list ("abc" ), "int" : list (range (1 , 4 ))})
409
418
410
419
# unicode
@@ -431,27 +440,39 @@ def test_columns_dtypes_invalid(self, engine):
431
440
self .check_error_on_write (df , engine , ValueError , msg )
432
441
433
442
@pytest .mark .parametrize ("compression" , [None , "gzip" , "snappy" , "brotli" ])
434
- def test_compression (self , engine , compression ):
443
+ def test_compression (self , engine , compression , request ):
435
444
436
445
if compression == "snappy" :
437
446
pytest .importorskip ("snappy" )
438
447
439
448
elif compression == "brotli" :
440
449
pytest .importorskip ("brotli" )
441
450
451
+ if PY310 and engine == "fastparquet" :
452
+ request .node .add_marker (
453
+ pytest .mark .xfail (reason = "fastparquet failing on 3.10" )
454
+ )
442
455
df = pd .DataFrame ({"A" : [1 , 2 , 3 ]})
443
456
check_round_trip (df , engine , write_kwargs = {"compression" : compression })
444
457
445
- def test_read_columns (self , engine ):
458
+ def test_read_columns (self , engine , request ):
446
459
# GH18154
460
+ if PY310 and engine == "fastparquet" :
461
+ request .node .add_marker (
462
+ pytest .mark .xfail (reason = "fastparquet failing on 3.10" )
463
+ )
447
464
df = pd .DataFrame ({"string" : list ("abc" ), "int" : list (range (1 , 4 ))})
448
465
449
466
expected = pd .DataFrame ({"string" : list ("abc" )})
450
467
check_round_trip (
451
468
df , engine , expected = expected , read_kwargs = {"columns" : ["string" ]}
452
469
)
453
470
454
- def test_write_index (self , engine ):
471
+ def test_write_index (self , engine , request ):
472
+ if PY310 and engine == "fastparquet" :
473
+ request .node .add_marker (
474
+ pytest .mark .xfail (reason = "fastparquet failing on 3.10" )
475
+ )
455
476
check_names = engine != "fastparquet"
456
477
457
478
df = pd .DataFrame ({"A" : [1 , 2 , 3 ]})
@@ -500,9 +521,13 @@ def test_multiindex_with_columns(self, pa):
500
521
df , engine , read_kwargs = {"columns" : ["A" , "B" ]}, expected = df [["A" , "B" ]]
501
522
)
502
523
503
- def test_write_ignoring_index (self , engine ):
524
+ def test_write_ignoring_index (self , engine , request ):
504
525
# ENH 20768
505
526
# Ensure index=False omits the index from the written Parquet file.
527
+ if PY310 and engine == "fastparquet" :
528
+ request .node .add_marker (
529
+ pytest .mark .xfail (reason = "fastparquet failing on 3.10" )
530
+ )
506
531
df = pd .DataFrame ({"a" : [1 , 2 , 3 ], "b" : ["q" , "r" , "s" ]})
507
532
508
533
write_kwargs = {"compression" : None , "index" : False }
@@ -986,6 +1011,7 @@ def test_read_parquet_manager(self, pa, using_array_manager):
986
1011
987
1012
988
1013
class TestParquetFastParquet (Base ):
1014
+ @pytest .mark .xfail (PY310 , reason = "fastparquet failing on 3.10" )
989
1015
def test_basic (self , fp , df_full ):
990
1016
df = df_full
991
1017
@@ -1003,6 +1029,7 @@ def test_duplicate_columns(self, fp):
1003
1029
msg = "Cannot create parquet dataset with duplicate column names"
1004
1030
self .check_error_on_write (df , fp , ValueError , msg )
1005
1031
1032
+ @pytest .mark .xfail (PY310 , reason = "fastparquet failing on 3.10" )
1006
1033
def test_bool_with_none (self , fp ):
1007
1034
df = pd .DataFrame ({"a" : [True , None , False ]})
1008
1035
expected = pd .DataFrame ({"a" : [1.0 , np .nan , 0.0 ]}, dtype = "float16" )
@@ -1022,10 +1049,12 @@ def test_unsupported(self, fp):
1022
1049
msg = "Can't infer object conversion type"
1023
1050
self .check_error_on_write (df , fp , ValueError , msg )
1024
1051
1052
+ @pytest .mark .xfail (PY310 , reason = "fastparquet failing on 3.10" )
1025
1053
def test_categorical (self , fp ):
1026
1054
df = pd .DataFrame ({"a" : pd .Categorical (list ("abc" ))})
1027
1055
check_round_trip (df , fp )
1028
1056
1057
+ @pytest .mark .xfail (PY310 , reason = "fastparquet failing on 3.10" )
1029
1058
def test_filter_row_groups (self , fp ):
1030
1059
d = {"a" : list (range (0 , 3 ))}
1031
1060
df = pd .DataFrame (d )
@@ -1044,6 +1073,7 @@ def test_s3_roundtrip(self, df_compat, s3_resource, fp, s3so):
1044
1073
write_kwargs = {"compression" : None , "storage_options" : s3so },
1045
1074
)
1046
1075
1076
+ @pytest .mark .xfail (PY310 , reason = "fastparquet failing on 3.10" )
1047
1077
def test_partition_cols_supported (self , fp , df_full ):
1048
1078
# GH #23283
1049
1079
partition_cols = ["bool" , "int" ]
@@ -1061,6 +1091,7 @@ def test_partition_cols_supported(self, fp, df_full):
1061
1091
actual_partition_cols = fastparquet .ParquetFile (path , False ).cats
1062
1092
assert len (actual_partition_cols ) == 2
1063
1093
1094
+ @pytest .mark .xfail (PY310 , reason = "fastparquet failing on 3.10" )
1064
1095
def test_partition_cols_string (self , fp , df_full ):
1065
1096
# GH #27117
1066
1097
partition_cols = "bool"
@@ -1078,6 +1109,7 @@ def test_partition_cols_string(self, fp, df_full):
1078
1109
actual_partition_cols = fastparquet .ParquetFile (path , False ).cats
1079
1110
assert len (actual_partition_cols ) == 1
1080
1111
1112
+ @pytest .mark .xfail (PY310 , reason = "fastparquet failing on 3.10" )
1081
1113
def test_partition_on_supported (self , fp , df_full ):
1082
1114
# GH #23283
1083
1115
partition_cols = ["bool" , "int" ]
@@ -1113,13 +1145,15 @@ def test_error_on_using_partition_cols_and_partition_on(self, fp, df_full):
1113
1145
partition_cols = partition_cols ,
1114
1146
)
1115
1147
1148
+ @pytest .mark .xfail (PY310 , reason = "fastparquet failing on 3.10" )
1116
1149
def test_empty_dataframe (self , fp ):
1117
1150
# GH #27339
1118
1151
df = pd .DataFrame ()
1119
1152
expected = df .copy ()
1120
1153
expected .index .name = "index"
1121
1154
check_round_trip (df , fp , expected = expected )
1122
1155
1156
+ @pytest .mark .xfail (PY310 , reason = "fastparquet failing on 3.10" )
1123
1157
def test_timezone_aware_index (self , fp , timezone_aware_date_list ):
1124
1158
idx = 5 * [timezone_aware_date_list ]
1125
1159
0 commit comments