@@ -65,7 +65,7 @@ def test_repr(dtype):
65
65
assert repr (df ) == expected
66
66
67
67
if dtype .na_value is np .nan :
68
- expected = "0 a\n 1 NaN\n 2 b\n Name: A, dtype: string "
68
+ expected = "0 a\n 1 NaN\n 2 b\n Name: A, dtype: str "
69
69
else :
70
70
expected = "0 a\n 1 <NA>\n 2 b\n Name: A, dtype: string"
71
71
assert repr (df .A ) == expected
@@ -75,10 +75,10 @@ def test_repr(dtype):
75
75
expected = f"<{ arr_name } >\n ['a', <NA>, 'b']\n Length: 3, dtype: string"
76
76
elif dtype .storage == "pyarrow" and dtype .na_value is np .nan :
77
77
arr_name = "ArrowStringArrayNumpySemantics"
78
- expected = f"<{ arr_name } >\n ['a', nan, 'b']\n Length: 3, dtype: string "
78
+ expected = f"<{ arr_name } >\n ['a', nan, 'b']\n Length: 3, dtype: str "
79
79
elif dtype .storage == "python" and dtype .na_value is np .nan :
80
80
arr_name = "StringArrayNumpySemantics"
81
- expected = f"<{ arr_name } >\n ['a', nan, 'b']\n Length: 3, dtype: string "
81
+ expected = f"<{ arr_name } >\n ['a', nan, 'b']\n Length: 3, dtype: str "
82
82
else :
83
83
arr_name = "StringArray"
84
84
expected = f"<{ arr_name } >\n ['a', <NA>, 'b']\n Length: 3, dtype: string"
@@ -502,7 +502,7 @@ def test_fillna_args(dtype):
502
502
tm .assert_extension_array_equal (res , expected )
503
503
504
504
if dtype .storage == "pyarrow" :
505
- msg = "Invalid value '1' for dtype string "
505
+ msg = "Invalid value '1' for dtype str "
506
506
else :
507
507
msg = "Cannot set non-string value '1' into a StringArray."
508
508
with pytest .raises (TypeError , match = msg ):
@@ -524,7 +524,7 @@ def test_arrow_array(dtype):
524
524
assert arr .equals (expected )
525
525
526
526
527
- @pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" )
527
+ @pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" , strict = False )
528
528
@pytest .mark .filterwarnings ("ignore:Passing a BlockManager:DeprecationWarning" )
529
529
def test_arrow_roundtrip (dtype , string_storage , using_infer_string ):
530
530
# roundtrip possible from arrow 1.0.0
@@ -539,14 +539,17 @@ def test_arrow_roundtrip(dtype, string_storage, using_infer_string):
539
539
assert table .field ("a" ).type == "large_string"
540
540
with pd .option_context ("string_storage" , string_storage ):
541
541
result = table .to_pandas ()
542
- assert isinstance (result ["a" ].dtype , pd .StringDtype )
543
- expected = df .astype (f"string[{ string_storage } ]" )
544
- tm .assert_frame_equal (result , expected )
545
- # ensure the missing value is represented by NA and not np.nan or None
546
- assert result .loc [2 , "a" ] is result ["a" ].dtype .na_value
542
+ if dtype .na_value is np .nan and not using_string_dtype ():
543
+ assert result ["a" ].dtype == "object"
544
+ else :
545
+ assert isinstance (result ["a" ].dtype , pd .StringDtype )
546
+ expected = df .astype (f"string[{ string_storage } ]" )
547
+ tm .assert_frame_equal (result , expected )
548
+ # ensure the missing value is represented by NA and not np.nan or None
549
+ assert result .loc [2 , "a" ] is result ["a" ].dtype .na_value
547
550
548
551
549
- @pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" )
552
+ @pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" , strict = False )
550
553
@pytest .mark .filterwarnings ("ignore:Passing a BlockManager:DeprecationWarning" )
551
554
def test_arrow_load_from_zero_chunks (dtype , string_storage , using_infer_string ):
552
555
# GH-41040
@@ -563,9 +566,13 @@ def test_arrow_load_from_zero_chunks(dtype, string_storage, using_infer_string):
563
566
table = pa .table ([pa .chunked_array ([], type = pa .string ())], schema = table .schema )
564
567
with pd .option_context ("string_storage" , string_storage ):
565
568
result = table .to_pandas ()
566
- assert isinstance (result ["a" ].dtype , pd .StringDtype )
567
- expected = df .astype (f"string[{ string_storage } ]" )
568
- tm .assert_frame_equal (result , expected )
569
+
570
+ if dtype .na_value is np .nan and not using_string_dtype ():
571
+ assert result ["a" ].dtype == "object"
572
+ else :
573
+ assert isinstance (result ["a" ].dtype , pd .StringDtype )
574
+ expected = df .astype (f"string[{ string_storage } ]" )
575
+ tm .assert_frame_equal (result , expected )
569
576
570
577
571
578
def test_value_counts_na (dtype ):
0 commit comments