@@ -66,7 +66,7 @@ def test_repr(dtype):
66
66
assert repr (df ) == expected
67
67
68
68
if dtype .na_value is np .nan :
69
- expected = "0 a\n 1 NaN\n 2 b\n Name: A, dtype: string "
69
+ expected = "0 a\n 1 NaN\n 2 b\n Name: A, dtype: str "
70
70
else :
71
71
expected = "0 a\n 1 <NA>\n 2 b\n Name: A, dtype: string"
72
72
assert repr (df .A ) == expected
@@ -76,10 +76,10 @@ def test_repr(dtype):
76
76
expected = f"<{ arr_name } >\n ['a', <NA>, 'b']\n Length: 3, dtype: string"
77
77
elif dtype .storage == "pyarrow" and dtype .na_value is np .nan :
78
78
arr_name = "ArrowStringArrayNumpySemantics"
79
- expected = f"<{ arr_name } >\n ['a', nan, 'b']\n Length: 3, dtype: string "
79
+ expected = f"<{ arr_name } >\n ['a', nan, 'b']\n Length: 3, dtype: str "
80
80
elif dtype .storage == "python" and dtype .na_value is np .nan :
81
81
arr_name = "StringArrayNumpySemantics"
82
- expected = f"<{ arr_name } >\n ['a', nan, 'b']\n Length: 3, dtype: string "
82
+ expected = f"<{ arr_name } >\n ['a', nan, 'b']\n Length: 3, dtype: str "
83
83
else :
84
84
arr_name = "StringArray"
85
85
expected = f"<{ arr_name } >\n ['a', <NA>, 'b']\n Length: 3, dtype: string"
@@ -500,7 +500,7 @@ def test_fillna_args(dtype):
500
500
tm .assert_extension_array_equal (res , expected )
501
501
502
502
if dtype .storage == "pyarrow" :
503
- msg = "Invalid value '1' for dtype string "
503
+ msg = "Invalid value '1' for dtype str "
504
504
else :
505
505
msg = "Cannot set non-string value '1' into a StringArray."
506
506
with pytest .raises (TypeError , match = msg ):
@@ -522,7 +522,7 @@ def test_arrow_array(dtype):
522
522
assert arr .equals (expected )
523
523
524
524
525
- @pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" )
525
+ @pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" , strict = False )
526
526
@pytest .mark .filterwarnings ("ignore:Passing a BlockManager:DeprecationWarning" )
527
527
def test_arrow_roundtrip (dtype , string_storage , using_infer_string ):
528
528
# roundtrip possible from arrow 1.0.0
@@ -537,14 +537,17 @@ def test_arrow_roundtrip(dtype, string_storage, using_infer_string):
537
537
assert table .field ("a" ).type == "large_string"
538
538
with pd .option_context ("string_storage" , string_storage ):
539
539
result = table .to_pandas ()
540
- assert isinstance (result ["a" ].dtype , pd .StringDtype )
541
- expected = df .astype (f"string[{ string_storage } ]" )
542
- tm .assert_frame_equal (result , expected )
543
- # ensure the missing value is represented by NA and not np.nan or None
544
- assert result .loc [2 , "a" ] is result ["a" ].dtype .na_value
540
+ if dtype .na_value is np .nan and not using_string_dtype ():
541
+ assert result ["a" ].dtype == "object"
542
+ else :
543
+ assert isinstance (result ["a" ].dtype , pd .StringDtype )
544
+ expected = df .astype (f"string[{ string_storage } ]" )
545
+ tm .assert_frame_equal (result , expected )
546
+ # ensure the missing value is represented by NA and not np.nan or None
547
+ assert result .loc [2 , "a" ] is result ["a" ].dtype .na_value
545
548
546
549
547
- @pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" )
550
+ @pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" , strict = False )
548
551
@pytest .mark .filterwarnings ("ignore:Passing a BlockManager:DeprecationWarning" )
549
552
def test_arrow_load_from_zero_chunks (dtype , string_storage , using_infer_string ):
550
553
# GH-41040
@@ -561,9 +564,13 @@ def test_arrow_load_from_zero_chunks(dtype, string_storage, using_infer_string):
561
564
table = pa .table ([pa .chunked_array ([], type = pa .string ())], schema = table .schema )
562
565
with pd .option_context ("string_storage" , string_storage ):
563
566
result = table .to_pandas ()
564
- assert isinstance (result ["a" ].dtype , pd .StringDtype )
565
- expected = df .astype (f"string[{ string_storage } ]" )
566
- tm .assert_frame_equal (result , expected )
567
+
568
+ if dtype .na_value is np .nan and not using_string_dtype ():
569
+ assert result ["a" ].dtype == "object"
570
+ else :
571
+ assert isinstance (result ["a" ].dtype , pd .StringDtype )
572
+ expected = df .astype (f"string[{ string_storage } ]" )
573
+ tm .assert_frame_equal (result , expected )
567
574
568
575
569
576
def test_value_counts_na (dtype ):
0 commit comments