17
17
)
18
18
19
19
20
+ def na_val (dtype ):
21
+ if dtype .storage == "pyarrow_numpy" :
22
+ return np .nan
23
+ else :
24
+ return pd .NA
25
+
26
+
20
27
@pytest .fixture
21
28
def dtype (string_storage ):
22
29
"""Fixture giving StringDtype from parametrized 'string_storage'"""
@@ -31,26 +38,34 @@ def cls(dtype):
31
38
32
39
def test_repr (dtype ):
33
40
df = pd .DataFrame ({"A" : pd .array (["a" , pd .NA , "b" ], dtype = dtype )})
34
- expected = " A\n 0 a\n 1 <NA>\n 2 b"
41
+ if dtype .storage == "pyarrow_numpy" :
42
+ expected = " A\n 0 a\n 1 NaN\n 2 b"
43
+ else :
44
+ expected = " A\n 0 a\n 1 <NA>\n 2 b"
35
45
assert repr (df ) == expected
36
46
37
- expected = "0 a\n 1 <NA>\n 2 b\n Name: A, dtype: string"
47
+ if dtype .storage == "pyarrow_numpy" :
48
+ expected = "0 a\n 1 NaN\n 2 b\n Name: A, dtype: string"
49
+ else :
50
+ expected = "0 a\n 1 <NA>\n 2 b\n Name: A, dtype: string"
38
51
assert repr (df .A ) == expected
39
52
40
53
if dtype .storage == "pyarrow" :
41
54
arr_name = "ArrowStringArray"
55
+ expected = f"<{ arr_name } >\n ['a', <NA>, 'b']\n Length: 3, dtype: string"
42
56
elif dtype .storage == "pyarrow_numpy" :
43
57
arr_name = "ArrowStringArrayNumpySemantics"
58
+ expected = f"<{ arr_name } >\n ['a', nan, 'b']\n Length: 3, dtype: string"
44
59
else :
45
60
arr_name = "StringArray"
46
- expected = f"<{ arr_name } >\n ['a', <NA>, 'b']\n Length: 3, dtype: string"
61
+ expected = f"<{ arr_name } >\n ['a', <NA>, 'b']\n Length: 3, dtype: string"
47
62
assert repr (df .A .array ) == expected
48
63
49
64
50
65
def test_none_to_nan (cls ):
51
66
a = cls ._from_sequence (["a" , None , "b" ])
52
67
assert a [1 ] is not None
53
- assert a [1 ] is pd . NA
68
+ assert a [1 ] is na_val ( a . dtype )
54
69
55
70
56
71
def test_setitem_validates (cls ):
@@ -205,13 +220,9 @@ def test_comparison_methods_scalar(comparison_op, dtype):
205
220
other = "a"
206
221
result = getattr (a , op_name )(other )
207
222
if dtype .storage == "pyarrow_numpy" :
208
- expected = np .array ([getattr (item , op_name )(other ) for item in a ], dtype = object )
209
- expected = (
210
- pd .array (expected , dtype = "boolean" )
211
- .to_numpy (na_value = False )
212
- .astype (np .bool_ )
213
- )
214
- tm .assert_numpy_array_equal (result , expected )
223
+ expected = np .array ([getattr (item , op_name )(other ) for item in a ])
224
+ expected [1 ] = False
225
+ tm .assert_numpy_array_equal (result , expected .astype (np .bool_ ))
215
226
else :
216
227
expected_dtype = "boolean[pyarrow]" if dtype .storage == "pyarrow" else "boolean"
217
228
expected = np .array ([getattr (item , op_name )(other ) for item in a ], dtype = object )
@@ -407,7 +418,7 @@ def test_min_max(method, skipna, dtype, request):
407
418
expected = "a" if method == "min" else "c"
408
419
assert result == expected
409
420
else :
410
- assert result is pd . NA
421
+ assert result is na_val ( arr . dtype )
411
422
412
423
413
424
@pytest .mark .parametrize ("method" , ["min" , "max" ])
@@ -475,7 +486,7 @@ def test_arrow_roundtrip(dtype, string_storage2):
475
486
expected = df .astype (f"string[{ string_storage2 } ]" )
476
487
tm .assert_frame_equal (result , expected )
477
488
# ensure the missing value is represented by NA and not np.nan or None
478
- assert result .loc [2 , "a" ] is pd . NA
489
+ assert result .loc [2 , "a" ] is na_val ( result [ "a" ]. dtype )
479
490
480
491
481
492
def test_arrow_load_from_zero_chunks (dtype , string_storage2 ):
@@ -573,7 +584,7 @@ def test_astype_from_float_dtype(float_dtype, dtype):
573
584
def test_to_numpy_returns_pdna_default (dtype ):
574
585
arr = pd .array (["a" , pd .NA , "b" ], dtype = dtype )
575
586
result = np .array (arr )
576
- expected = np .array (["a" , pd . NA , "b" ], dtype = object )
587
+ expected = np .array (["a" , na_val ( dtype ) , "b" ], dtype = object )
577
588
tm .assert_numpy_array_equal (result , expected )
578
589
579
590
@@ -613,7 +624,7 @@ def test_setitem_scalar_with_mask_validation(dtype):
613
624
mask = np .array ([False , True , False ])
614
625
615
626
ser [mask ] = None
616
- assert ser .array [1 ] is pd . NA
627
+ assert ser .array [1 ] is na_val ( ser . dtype )
617
628
618
629
# for other non-string we should also raise an error
619
630
ser = pd .Series (["a" , "b" , "c" ], dtype = dtype )
0 commit comments