17
17
)
18
18
19
19
20
+ def na_val (dtype ):
21
+ if dtype .storage == "pyarrow_numpy" :
22
+ return np .nan
23
+ else :
24
+ return pd .NA
25
+
26
+
20
27
@pytest .fixture
21
28
def dtype (string_storage ):
22
29
"""Fixture giving StringDtype from parametrized 'string_storage'"""
@@ -31,26 +38,34 @@ def cls(dtype):
31
38
32
39
def test_repr (dtype ):
33
40
df = pd .DataFrame ({"A" : pd .array (["a" , pd .NA , "b" ], dtype = dtype )})
34
- expected = " A\n 0 a\n 1 <NA>\n 2 b"
41
+ if dtype .storage == "pyarrow_numpy" :
42
+ expected = " A\n 0 a\n 1 NaN\n 2 b"
43
+ else :
44
+ expected = " A\n 0 a\n 1 <NA>\n 2 b"
35
45
assert repr (df ) == expected
36
46
37
- expected = "0 a\n 1 <NA>\n 2 b\n Name: A, dtype: string"
47
+ if dtype .storage == "pyarrow_numpy" :
48
+ expected = "0 a\n 1 NaN\n 2 b\n Name: A, dtype: string"
49
+ else :
50
+ expected = "0 a\n 1 <NA>\n 2 b\n Name: A, dtype: string"
38
51
assert repr (df .A ) == expected
39
52
40
53
if dtype .storage == "pyarrow" :
41
54
arr_name = "ArrowStringArray"
55
+ expected = f"<{ arr_name } >\n ['a', <NA>, 'b']\n Length: 3, dtype: string"
42
56
elif dtype .storage == "pyarrow_numpy" :
43
57
arr_name = "ArrowStringArrayNumpySemantics"
58
+ expected = f"<{ arr_name } >\n ['a', nan, 'b']\n Length: 3, dtype: string"
44
59
else :
45
60
arr_name = "StringArray"
46
- expected = f"<{ arr_name } >\n ['a', <NA>, 'b']\n Length: 3, dtype: string"
61
+ expected = f"<{ arr_name } >\n ['a', <NA>, 'b']\n Length: 3, dtype: string"
47
62
assert repr (df .A .array ) == expected
48
63
49
64
50
65
def test_none_to_nan (cls ):
51
66
a = cls ._from_sequence (["a" , None , "b" ])
52
67
assert a [1 ] is not None
53
- assert a [1 ] is pd . NA
68
+ assert a [1 ] is na_val ( a . dtype )
54
69
55
70
56
71
def test_setitem_validates (cls ):
@@ -213,13 +228,9 @@ def test_comparison_methods_scalar(comparison_op, dtype):
213
228
other = "a"
214
229
result = getattr (a , op_name )(other )
215
230
if dtype .storage == "pyarrow_numpy" :
216
- expected = np .array ([getattr (item , op_name )(other ) for item in a ], dtype = object )
217
- expected = (
218
- pd .array (expected , dtype = "boolean" )
219
- .to_numpy (na_value = False )
220
- .astype (np .bool_ )
221
- )
222
- tm .assert_numpy_array_equal (result , expected )
231
+ expected = np .array ([getattr (item , op_name )(other ) for item in a ])
232
+ expected [1 ] = False
233
+ tm .assert_numpy_array_equal (result , expected .astype (np .bool_ ))
223
234
else :
224
235
expected_dtype = "boolean[pyarrow]" if dtype .storage == "pyarrow" else "boolean"
225
236
expected = np .array ([getattr (item , op_name )(other ) for item in a ], dtype = object )
@@ -415,7 +426,7 @@ def test_min_max(method, skipna, dtype, request):
415
426
expected = "a" if method == "min" else "c"
416
427
assert result == expected
417
428
else :
418
- assert result is pd . NA
429
+ assert result is na_val ( arr . dtype )
419
430
420
431
421
432
@pytest .mark .parametrize ("method" , ["min" , "max" ])
@@ -483,7 +494,7 @@ def test_arrow_roundtrip(dtype, string_storage2):
483
494
expected = df .astype (f"string[{ string_storage2 } ]" )
484
495
tm .assert_frame_equal (result , expected )
485
496
# ensure the missing value is represented by NA and not np.nan or None
486
- assert result .loc [2 , "a" ] is pd . NA
497
+ assert result .loc [2 , "a" ] is na_val ( result [ "a" ]. dtype )
487
498
488
499
489
500
def test_arrow_load_from_zero_chunks (dtype , string_storage2 ):
@@ -581,7 +592,7 @@ def test_astype_from_float_dtype(float_dtype, dtype):
581
592
def test_to_numpy_returns_pdna_default (dtype ):
582
593
arr = pd .array (["a" , pd .NA , "b" ], dtype = dtype )
583
594
result = np .array (arr )
584
- expected = np .array (["a" , pd . NA , "b" ], dtype = object )
595
+ expected = np .array (["a" , na_val ( dtype ) , "b" ], dtype = object )
585
596
tm .assert_numpy_array_equal (result , expected )
586
597
587
598
@@ -621,7 +632,7 @@ def test_setitem_scalar_with_mask_validation(dtype):
621
632
mask = np .array ([False , True , False ])
622
633
623
634
ser [mask ] = None
624
- assert ser .array [1 ] is pd . NA
635
+ assert ser .array [1 ] is na_val ( ser . dtype )
625
636
626
637
# for other non-string we should also raise an error
627
638
ser = pd .Series (["a" , "b" , "c" ], dtype = dtype )
0 commit comments