11
11
)
12
12
13
13
14
- def test_string_array (any_string_method ):
14
+ def test_string_array (nullable_string_dtype , any_string_method , request ):
15
15
method_name , args , kwargs = any_string_method
16
16
if method_name == "decode" :
17
17
pytest .skip ("decode requires bytes." )
18
18
19
+ if nullable_string_dtype == "arrow_string" and method_name in {
20
+ "extract" ,
21
+ "extractall" ,
22
+ }:
23
+ reason = "extract/extractall does not yet dispatch to array"
24
+ mark = pytest .mark .xfail (reason = reason )
25
+ request .node .add_marker (mark )
26
+
19
27
data = ["a" , "bb" , np .nan , "ccc" ]
20
28
a = Series (data , dtype = object )
21
- b = Series (data , dtype = "string" )
29
+ b = Series (data , dtype = nullable_string_dtype )
22
30
23
31
expected = getattr (a .str , method_name )(* args , ** kwargs )
24
32
result = getattr (b .str , method_name )(* args , ** kwargs )
@@ -27,7 +35,7 @@ def test_string_array(any_string_method):
27
35
if expected .dtype == "object" and lib .is_string_array (
28
36
expected .dropna ().values ,
29
37
):
30
- assert result .dtype == "string"
38
+ assert result .dtype == nullable_string_dtype
31
39
result = result .astype (object )
32
40
33
41
elif expected .dtype == "object" and lib .is_bool_array (
@@ -46,7 +54,7 @@ def test_string_array(any_string_method):
46
54
47
55
elif isinstance (expected , DataFrame ):
48
56
columns = expected .select_dtypes (include = "object" ).columns
49
- assert all (result [columns ].dtypes == "string" )
57
+ assert all (result [columns ].dtypes == nullable_string_dtype )
50
58
result [columns ] = result [columns ].astype (object )
51
59
tm .assert_equal (result , expected )
52
60
@@ -60,8 +68,8 @@ def test_string_array(any_string_method):
60
68
("rindex" , [2 , None ]),
61
69
],
62
70
)
63
- def test_string_array_numeric_integer_array (method , expected ):
64
- s = Series (["aba" , None ], dtype = "string" )
71
+ def test_string_array_numeric_integer_array (nullable_string_dtype , method , expected ):
72
+ s = Series (["aba" , None ], dtype = nullable_string_dtype )
65
73
result = getattr (s .str , method )("a" )
66
74
expected = Series (expected , dtype = "Int64" )
67
75
tm .assert_series_equal (result , expected )
@@ -73,33 +81,39 @@ def test_string_array_numeric_integer_array(method, expected):
73
81
("isdigit" , [False , None , True ]),
74
82
("isalpha" , [True , None , False ]),
75
83
("isalnum" , [True , None , True ]),
76
- ("isdigit " , [False , None , True ]),
84
+ ("isnumeric " , [False , None , True ]),
77
85
],
78
86
)
79
- def test_string_array_boolean_array (method , expected ):
80
- s = Series (["a" , None , "1" ], dtype = "string" )
87
+ def test_string_array_boolean_array (nullable_string_dtype , method , expected ):
88
+ s = Series (["a" , None , "1" ], dtype = nullable_string_dtype )
81
89
result = getattr (s .str , method )()
82
90
expected = Series (expected , dtype = "boolean" )
83
91
tm .assert_series_equal (result , expected )
84
92
85
93
86
- def test_string_array_extract ():
94
+ def test_string_array_extract (nullable_string_dtype , request ):
87
95
# https://github.com/pandas-dev/pandas/issues/30969
88
96
# Only expand=False & multiple groups was failing
89
- a = Series (["a1" , "b2" , "cc" ], dtype = "string" )
97
+
98
+ if nullable_string_dtype == "arrow_string" :
99
+ reason = "extract does not yet dispatch to array"
100
+ mark = pytest .mark .xfail (reason = reason )
101
+ request .node .add_marker (mark )
102
+
103
+ a = Series (["a1" , "b2" , "cc" ], dtype = nullable_string_dtype )
90
104
b = Series (["a1" , "b2" , "cc" ], dtype = "object" )
91
105
pat = r"(\w)(\d)"
92
106
93
107
result = a .str .extract (pat , expand = False )
94
108
expected = b .str .extract (pat , expand = False )
95
- assert all (result .dtypes == "string" )
109
+ assert all (result .dtypes == nullable_string_dtype )
96
110
97
111
result = result .astype (object )
98
112
tm .assert_equal (result , expected )
99
113
100
114
101
- def test_str_get_stringarray_multiple_nans ():
102
- s = Series (pd .array (["a" , "ab" , pd .NA , "abc" ]))
115
+ def test_str_get_stringarray_multiple_nans (nullable_string_dtype ):
116
+ s = Series (pd .array (["a" , "ab" , pd .NA , "abc" ], dtype = nullable_string_dtype ))
103
117
result = s .str .get (2 )
104
- expected = Series (pd .array ([pd .NA , pd .NA , pd .NA , "c" ]))
118
+ expected = Series (pd .array ([pd .NA , pd .NA , pd .NA , "c" ], dtype = nullable_string_dtype ))
105
119
tm .assert_series_equal (result , expected )
0 commit comments