13
13
14
14
from pandas ._config import get_option
15
15
16
+ from pandas .compat import PY310
16
17
from pandas .compat .pyarrow import (
17
18
pa_version_under2p0 ,
18
19
pa_version_under5p0 ,
@@ -261,6 +262,7 @@ def test_options_py(df_compat, pa):
261
262
check_round_trip (df_compat )
262
263
263
264
265
+ @pytest .mark .xfail (PY310 , reason = "fastparquet failing on 3.10" )
264
266
def test_options_fp (df_compat , fp ):
265
267
# use the set option
266
268
@@ -338,6 +340,7 @@ def test_get_engine_auto_error_message():
338
340
get_engine ("auto" )
339
341
340
342
343
+ @pytest .mark .xfail (PY310 , reason = "fastparquet failing on 3.10" )
341
344
def test_cross_engine_pa_fp (df_cross_compat , pa , fp ):
342
345
# cross-compat with differing reading/writing engines
343
346
@@ -404,7 +407,11 @@ def test_error(self, engine):
404
407
msg = "to_parquet only supports IO with DataFrames"
405
408
self .check_error_on_write (obj , engine , ValueError , msg )
406
409
407
- def test_columns_dtypes (self , engine ):
410
+ def test_columns_dtypes (self , request , engine ):
411
+ if PY310 and engine == "fastparquet" :
412
+ request .node .add_marker (
413
+ pytest .mark .xfail (reason = "fastparquet failing on 3.10" )
414
+ )
408
415
df = pd .DataFrame ({"string" : list ("abc" ), "int" : list (range (1 , 4 ))})
409
416
410
417
# unicode
@@ -431,27 +438,39 @@ def test_columns_dtypes_invalid(self, engine):
431
438
self .check_error_on_write (df , engine , ValueError , msg )
432
439
433
440
@pytest .mark .parametrize ("compression" , [None , "gzip" , "snappy" , "brotli" ])
434
- def test_compression (self , engine , compression ):
441
+ def test_compression (self , engine , compression , request ):
435
442
436
443
if compression == "snappy" :
437
444
pytest .importorskip ("snappy" )
438
445
439
446
elif compression == "brotli" :
440
447
pytest .importorskip ("brotli" )
441
448
449
+ if PY310 and engine == "fastparquet" :
450
+ request .node .add_marker (
451
+ pytest .mark .xfail (reason = "fastparquet failing on 3.10" )
452
+ )
442
453
df = pd .DataFrame ({"A" : [1 , 2 , 3 ]})
443
454
check_round_trip (df , engine , write_kwargs = {"compression" : compression })
444
455
445
- def test_read_columns (self , engine ):
456
+ def test_read_columns (self , engine , request ):
446
457
# GH18154
458
+ if PY310 and engine == "fastparquet" :
459
+ request .node .add_marker (
460
+ pytest .mark .xfail (reason = "fastparquet failing on 3.10" )
461
+ )
447
462
df = pd .DataFrame ({"string" : list ("abc" ), "int" : list (range (1 , 4 ))})
448
463
449
464
expected = pd .DataFrame ({"string" : list ("abc" )})
450
465
check_round_trip (
451
466
df , engine , expected = expected , read_kwargs = {"columns" : ["string" ]}
452
467
)
453
468
454
- def test_write_index (self , engine ):
469
+ def test_write_index (self , engine , request ):
470
+ if PY310 and engine == "fastparquet" :
471
+ request .node .add_marker (
472
+ pytest .mark .xfail (reason = "fastparquet failing on 3.10" )
473
+ )
455
474
check_names = engine != "fastparquet"
456
475
457
476
df = pd .DataFrame ({"A" : [1 , 2 , 3 ]})
@@ -500,9 +519,13 @@ def test_multiindex_with_columns(self, pa):
500
519
df , engine , read_kwargs = {"columns" : ["A" , "B" ]}, expected = df [["A" , "B" ]]
501
520
)
502
521
503
- def test_write_ignoring_index (self , engine ):
522
+ def test_write_ignoring_index (self , engine , request ):
504
523
# ENH 20768
505
524
# Ensure index=False omits the index from the written Parquet file.
525
+ if PY310 and engine == "fastparquet" :
526
+ request .node .add_marker (
527
+ pytest .mark .xfail (reason = "fastparquet failing on 3.10" )
528
+ )
506
529
df = pd .DataFrame ({"a" : [1 , 2 , 3 ], "b" : ["q" , "r" , "s" ]})
507
530
508
531
write_kwargs = {"compression" : None , "index" : False }
@@ -958,6 +981,7 @@ def test_read_parquet_manager(self, pa, using_array_manager):
958
981
959
982
960
983
class TestParquetFastParquet (Base ):
984
+ @pytest .mark .xfail (PY310 , reason = "fastparquet failing on 3.10" )
961
985
def test_basic (self , fp , df_full ):
962
986
df = df_full
963
987
@@ -975,6 +999,7 @@ def test_duplicate_columns(self, fp):
975
999
msg = "Cannot create parquet dataset with duplicate column names"
976
1000
self .check_error_on_write (df , fp , ValueError , msg )
977
1001
1002
+ @pytest .mark .xfail (PY310 , reason = "fastparquet failing on 3.10" )
978
1003
def test_bool_with_none (self , fp ):
979
1004
df = pd .DataFrame ({"a" : [True , None , False ]})
980
1005
expected = pd .DataFrame ({"a" : [1.0 , np .nan , 0.0 ]}, dtype = "float16" )
@@ -994,10 +1019,12 @@ def test_unsupported(self, fp):
994
1019
msg = "Can't infer object conversion type"
995
1020
self .check_error_on_write (df , fp , ValueError , msg )
996
1021
1022
+ @pytest .mark .xfail (PY310 , reason = "fastparquet failing on 3.10" )
997
1023
def test_categorical (self , fp ):
998
1024
df = pd .DataFrame ({"a" : pd .Categorical (list ("abc" ))})
999
1025
check_round_trip (df , fp )
1000
1026
1027
+ @pytest .mark .xfail (PY310 , reason = "fastparquet failing on 3.10" )
1001
1028
def test_filter_row_groups (self , fp ):
1002
1029
d = {"a" : list (range (0 , 3 ))}
1003
1030
df = pd .DataFrame (d )
@@ -1016,6 +1043,7 @@ def test_s3_roundtrip(self, df_compat, s3_resource, fp, s3so):
1016
1043
write_kwargs = {"compression" : None , "storage_options" : s3so },
1017
1044
)
1018
1045
1046
+ @pytest .mark .xfail (PY310 , reason = "fastparquet failing on 3.10" )
1019
1047
def test_partition_cols_supported (self , fp , df_full ):
1020
1048
# GH #23283
1021
1049
partition_cols = ["bool" , "int" ]
@@ -1033,6 +1061,7 @@ def test_partition_cols_supported(self, fp, df_full):
1033
1061
actual_partition_cols = fastparquet .ParquetFile (path , False ).cats
1034
1062
assert len (actual_partition_cols ) == 2
1035
1063
1064
+ @pytest .mark .xfail (PY310 , reason = "fastparquet failing on 3.10" )
1036
1065
def test_partition_cols_string (self , fp , df_full ):
1037
1066
# GH #27117
1038
1067
partition_cols = "bool"
@@ -1050,6 +1079,7 @@ def test_partition_cols_string(self, fp, df_full):
1050
1079
actual_partition_cols = fastparquet .ParquetFile (path , False ).cats
1051
1080
assert len (actual_partition_cols ) == 1
1052
1081
1082
+ @pytest .mark .xfail (PY310 , reason = "fastparquet failing on 3.10" )
1053
1083
def test_partition_on_supported (self , fp , df_full ):
1054
1084
# GH #23283
1055
1085
partition_cols = ["bool" , "int" ]
@@ -1085,13 +1115,15 @@ def test_error_on_using_partition_cols_and_partition_on(self, fp, df_full):
1085
1115
partition_cols = partition_cols ,
1086
1116
)
1087
1117
1118
+ @pytest .mark .xfail (PY310 , reason = "fastparquet failing on 3.10" )
1088
1119
def test_empty_dataframe (self , fp ):
1089
1120
# GH #27339
1090
1121
df = pd .DataFrame ()
1091
1122
expected = df .copy ()
1092
1123
expected .index .name = "index"
1093
1124
check_round_trip (df , fp , expected = expected )
1094
1125
1126
+ @pytest .mark .xfail (PY310 , reason = "fastparquet failing on 3.10" )
1095
1127
def test_timezone_aware_index (self , fp , timezone_aware_date_list ):
1096
1128
idx = 5 * [timezone_aware_date_list ]
1097
1129
0 commit comments