52
52
pytest .mark .filterwarnings (
53
53
"ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
54
54
),
55
- pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" , strict = False ),
56
55
]
57
56
58
57
61
60
params = [
62
61
pytest .param (
63
62
"fastparquet" ,
64
- marks = pytest .mark .skipif (
65
- not _HAVE_FASTPARQUET ,
66
- reason = "fastparquet is not installed" ,
67
- ),
63
+ marks = [
64
+ pytest .mark .skipif (
65
+ not _HAVE_FASTPARQUET ,
66
+ reason = "fastparquet is not installed" ,
67
+ ),
68
+ pytest .mark .xfail (
69
+ using_string_dtype (),
70
+ reason = "TODO(infer_string) fastparquet" ,
71
+ strict = False ,
72
+ ),
73
+ ],
68
74
),
69
75
pytest .param (
70
76
"pyarrow" ,
@@ -86,15 +92,22 @@ def pa():
86
92
87
93
88
94
@pytest .fixture
89
- def fp ():
95
+ def fp (request ):
90
96
if not _HAVE_FASTPARQUET :
91
97
pytest .skip ("fastparquet is not installed" )
98
+ if using_string_dtype ():
99
+ request .applymarker (
100
+ pytest .mark .xfail (reason = "TODO(infer_string) fastparquet" , strict = False )
101
+ )
92
102
return "fastparquet"
93
103
94
104
95
105
@pytest .fixture
96
106
def df_compat ():
97
- return pd .DataFrame ({"A" : [1 , 2 , 3 ], "B" : "foo" })
107
+ # TODO(infer_string) should this give str columns?
108
+ return pd .DataFrame (
109
+ {"A" : [1 , 2 , 3 ], "B" : "foo" }, columns = pd .Index (["A" , "B" ], dtype = object )
110
+ )
98
111
99
112
100
113
@pytest .fixture
@@ -366,16 +379,6 @@ def check_external_error_on_write(self, df, engine, exc):
366
379
with tm .external_error_raised (exc ):
367
380
to_parquet (df , path , engine , compression = None )
368
381
369
- @pytest .mark .network
370
- @pytest .mark .single_cpu
371
- def test_parquet_read_from_url (self , httpserver , datapath , df_compat , engine ):
372
- if engine != "auto" :
373
- pytest .importorskip (engine )
374
- with open (datapath ("io" , "data" , "parquet" , "simple.parquet" ), mode = "rb" ) as f :
375
- httpserver .serve_content (content = f .read ())
376
- df = read_parquet (httpserver .url )
377
- tm .assert_frame_equal (df , df_compat )
378
-
379
382
380
383
class TestBasic (Base ):
381
384
def test_error (self , engine ):
@@ -673,6 +676,16 @@ def test_read_empty_array(self, pa, dtype):
673
676
df , pa , read_kwargs = {"dtype_backend" : "numpy_nullable" }, expected = expected
674
677
)
675
678
679
+ @pytest .mark .network
680
+ @pytest .mark .single_cpu
681
+ def test_parquet_read_from_url (self , httpserver , datapath , df_compat , engine ):
682
+ if engine != "auto" :
683
+ pytest .importorskip (engine )
684
+ with open (datapath ("io" , "data" , "parquet" , "simple.parquet" ), mode = "rb" ) as f :
685
+ httpserver .serve_content (content = f .read ())
686
+ df = read_parquet (httpserver .url , engine = engine )
687
+ tm .assert_frame_equal (df , df_compat )
688
+
676
689
677
690
class TestParquetPyArrow (Base ):
678
691
@pytest .mark .xfail (reason = "datetime_with_nat unit doesn't round-trip" )
@@ -906,7 +919,7 @@ def test_write_with_schema(self, pa):
906
919
out_df = df .astype (bool )
907
920
check_round_trip (df , pa , write_kwargs = {"schema" : schema }, expected = out_df )
908
921
909
- def test_additional_extension_arrays (self , pa ):
922
+ def test_additional_extension_arrays (self , pa , using_infer_string ):
910
923
# test additional ExtensionArrays that are supported through the
911
924
# __arrow_array__ protocol
912
925
pytest .importorskip ("pyarrow" )
@@ -917,17 +930,25 @@ def test_additional_extension_arrays(self, pa):
917
930
"c" : pd .Series (["a" , None , "c" ], dtype = "string" ),
918
931
}
919
932
)
920
- check_round_trip (df , pa )
933
+ if using_infer_string :
934
+ check_round_trip (df , pa , expected = df .astype ({"c" : "str" }))
935
+ else :
936
+ check_round_trip (df , pa )
921
937
922
938
df = pd .DataFrame ({"a" : pd .Series ([1 , 2 , 3 , None ], dtype = "Int64" )})
923
939
check_round_trip (df , pa )
924
940
925
- def test_pyarrow_backed_string_array (self , pa , string_storage ):
941
+ def test_pyarrow_backed_string_array (self , pa , string_storage , using_infer_string ):
926
942
# test ArrowStringArray supported through the __arrow_array__ protocol
927
943
pytest .importorskip ("pyarrow" )
928
944
df = pd .DataFrame ({"a" : pd .Series (["a" , None , "c" ], dtype = "string[pyarrow]" )})
929
945
with pd .option_context ("string_storage" , string_storage ):
930
- check_round_trip (df , pa , expected = df .astype (f"string[{ string_storage } ]" ))
946
+ if using_infer_string :
947
+ expected = df .astype ("str" )
948
+ expected .columns = expected .columns .astype ("str" )
949
+ else :
950
+ expected = df .astype (f"string[{ string_storage } ]" )
951
+ check_round_trip (df , pa , expected = expected )
931
952
932
953
def test_additional_extension_types (self , pa ):
933
954
# test additional ExtensionArrays that are supported through the
0 commit comments