8
8
import numpy as np
9
9
import pytest
10
10
11
- from pandas ._config import using_string_dtype
12
-
13
11
from pandas ._libs .parsers import STR_NA_VALUES
14
12
15
13
from pandas import (
@@ -261,7 +259,6 @@ def test_na_value_dict_multi_index(all_parsers, index_col, expected):
261
259
tm .assert_frame_equal (result , expected )
262
260
263
261
264
- @pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" , strict = False )
265
262
@pytest .mark .parametrize (
266
263
"kwargs,expected" ,
267
264
[
@@ -299,7 +296,9 @@ def test_na_value_dict_multi_index(all_parsers, index_col, expected):
299
296
),
300
297
],
301
298
)
302
- def test_na_values_keep_default (all_parsers , kwargs , expected , request ):
299
+ def test_na_values_keep_default (
300
+ all_parsers , kwargs , expected , request , using_infer_string
301
+ ):
303
302
data = """\
304
303
A,B,C
305
304
a,1,one
@@ -317,8 +316,9 @@ def test_na_values_keep_default(all_parsers, kwargs, expected, request):
317
316
with pytest .raises (ValueError , match = msg ):
318
317
parser .read_csv (StringIO (data ), ** kwargs )
319
318
return
320
- mark = pytest .mark .xfail ()
321
- request .applymarker (mark )
319
+ if not using_infer_string or "na_values" in kwargs :
320
+ mark = pytest .mark .xfail ()
321
+ request .applymarker (mark )
322
322
323
323
result = parser .read_csv (StringIO (data ), ** kwargs )
324
324
expected = DataFrame (expected )
@@ -429,23 +429,28 @@ def test_no_keep_default_na_dict_na_values_diff_reprs(all_parsers, col_zero_na_v
429
429
tm .assert_frame_equal (result , expected )
430
430
431
431
432
- @pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" , strict = False )
433
- @xfail_pyarrow # mismatched dtypes in both cases, FutureWarning in the True case
434
432
@pytest .mark .parametrize (
435
433
"na_filter,row_data" ,
436
434
[
437
435
(True , [[1 , "A" ], [np .nan , np .nan ], [3 , "C" ]]),
438
436
(False , [["1" , "A" ], ["nan" , "B" ], ["3" , "C" ]]),
439
437
],
440
438
)
441
- def test_na_values_na_filter_override (all_parsers , na_filter , row_data ):
439
+ def test_na_values_na_filter_override (
440
+ request , all_parsers , na_filter , row_data , using_infer_string
441
+ ):
442
+ parser = all_parsers
443
+ if parser .engine == "pyarrow" :
444
+ # mismatched dtypes in both cases, FutureWarning in the True case
445
+ if not (using_infer_string and na_filter ):
446
+ mark = pytest .mark .xfail (reason = "pyarrow doesn't support this." )
447
+ request .applymarker (mark )
442
448
data = """\
443
449
A,B
444
450
1,A
445
451
nan,B
446
452
3,C
447
453
"""
448
- parser = all_parsers
449
454
result = parser .read_csv (StringIO (data ), na_values = ["B" ], na_filter = na_filter )
450
455
451
456
expected = DataFrame (row_data , columns = ["A" , "B" ])
@@ -536,7 +541,6 @@ def test_na_values_dict_aliasing(all_parsers):
536
541
tm .assert_dict_equal (na_values , na_values_copy )
537
542
538
543
539
- @pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" , strict = False )
540
544
def test_na_values_dict_null_column_name (all_parsers ):
541
545
# see gh-57547
542
546
parser = all_parsers
@@ -560,11 +564,10 @@ def test_na_values_dict_null_column_name(all_parsers):
560
564
return
561
565
562
566
expected = DataFrame (
563
- {None : ["MA" , "NA" , "OA" ], "x" : [1.0 , 2.0 , np .nan ], "y" : [2.0 , 1.0 , 3.0 ]}
567
+ {"x" : [1.0 , 2.0 , np .nan ], "y" : [2.0 , 1.0 , 3.0 ]},
568
+ index = Index (["MA" , "NA" , "OA" ], dtype = object ),
564
569
)
565
570
566
- expected = expected .set_index (None )
567
-
568
571
result = parser .read_csv (
569
572
StringIO (data ),
570
573
index_col = 0 ,
0 commit comments