13
13
)
14
14
from pandas .tests .strings import (
15
15
_convert_na_value ,
16
- object_pyarrow_numpy ,
16
+ is_object_or_nan_string_dtype ,
17
17
)
18
18
19
19
# --------------------------------------------------------------------------------------
@@ -33,7 +33,9 @@ def test_contains(any_string_dtype):
33
33
pat = "mmm[_]+"
34
34
35
35
result = values .str .contains (pat )
36
- expected_dtype = "object" if any_string_dtype in object_pyarrow_numpy else "boolean"
36
+ expected_dtype = (
37
+ "object" if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
38
+ )
37
39
expected = Series (
38
40
np .array ([False , np .nan , True , True , False ], dtype = np .object_ ),
39
41
dtype = expected_dtype ,
@@ -52,7 +54,9 @@ def test_contains(any_string_dtype):
52
54
dtype = any_string_dtype ,
53
55
)
54
56
result = values .str .contains (pat )
55
- expected_dtype = np .bool_ if any_string_dtype in object_pyarrow_numpy else "boolean"
57
+ expected_dtype = (
58
+ np .bool_ if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
59
+ )
56
60
expected = Series (np .array ([False , False , True , True ]), dtype = expected_dtype )
57
61
tm .assert_series_equal (result , expected )
58
62
@@ -79,14 +83,18 @@ def test_contains(any_string_dtype):
79
83
pat = "mmm[_]+"
80
84
81
85
result = values .str .contains (pat )
82
- expected_dtype = "object" if any_string_dtype in object_pyarrow_numpy else "boolean"
86
+ expected_dtype = (
87
+ "object" if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
88
+ )
83
89
expected = Series (
84
90
np .array ([False , np .nan , True , True ], dtype = np .object_ ), dtype = expected_dtype
85
91
)
86
92
tm .assert_series_equal (result , expected )
87
93
88
94
result = values .str .contains (pat , na = False )
89
- expected_dtype = np .bool_ if any_string_dtype in object_pyarrow_numpy else "boolean"
95
+ expected_dtype = (
96
+ np .bool_ if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
97
+ )
90
98
expected = Series (np .array ([False , False , True , True ]), dtype = expected_dtype )
91
99
tm .assert_series_equal (result , expected )
92
100
@@ -171,7 +179,9 @@ def test_contains_moar(any_string_dtype):
171
179
)
172
180
173
181
result = s .str .contains ("a" )
174
- expected_dtype = "object" if any_string_dtype in object_pyarrow_numpy else "boolean"
182
+ expected_dtype = (
183
+ "object" if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
184
+ )
175
185
expected = Series (
176
186
[False , False , False , True , True , False , np .nan , False , False , True ],
177
187
dtype = expected_dtype ,
@@ -212,7 +222,9 @@ def test_contains_nan(any_string_dtype):
212
222
s = Series ([np .nan , np .nan , np .nan ], dtype = any_string_dtype )
213
223
214
224
result = s .str .contains ("foo" , na = False )
215
- expected_dtype = np .bool_ if any_string_dtype in object_pyarrow_numpy else "boolean"
225
+ expected_dtype = (
226
+ np .bool_ if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
227
+ )
216
228
expected = Series ([False , False , False ], dtype = expected_dtype )
217
229
tm .assert_series_equal (result , expected )
218
230
@@ -230,7 +242,9 @@ def test_contains_nan(any_string_dtype):
230
242
tm .assert_series_equal (result , expected )
231
243
232
244
result = s .str .contains ("foo" )
233
- expected_dtype = "object" if any_string_dtype in object_pyarrow_numpy else "boolean"
245
+ expected_dtype = (
246
+ "object" if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
247
+ )
234
248
expected = Series ([np .nan , np .nan , np .nan ], dtype = expected_dtype )
235
249
tm .assert_series_equal (result , expected )
236
250
@@ -675,7 +689,9 @@ def test_replace_regex_single_character(regex, any_string_dtype):
675
689
676
690
def test_match (any_string_dtype ):
677
691
# New match behavior introduced in 0.13
678
- expected_dtype = "object" if any_string_dtype in object_pyarrow_numpy else "boolean"
692
+ expected_dtype = (
693
+ "object" if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
694
+ )
679
695
680
696
values = Series (["fooBAD__barBAD" , np .nan , "foo" ], dtype = any_string_dtype )
681
697
result = values .str .match (".*(BAD[_]+).*(BAD)" )
@@ -730,20 +746,26 @@ def test_match_na_kwarg(any_string_dtype):
730
746
s = Series (["a" , "b" , np .nan ], dtype = any_string_dtype )
731
747
732
748
result = s .str .match ("a" , na = False )
733
- expected_dtype = np .bool_ if any_string_dtype in object_pyarrow_numpy else "boolean"
749
+ expected_dtype = (
750
+ np .bool_ if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
751
+ )
734
752
expected = Series ([True , False , False ], dtype = expected_dtype )
735
753
tm .assert_series_equal (result , expected )
736
754
737
755
result = s .str .match ("a" )
738
- expected_dtype = "object" if any_string_dtype in object_pyarrow_numpy else "boolean"
756
+ expected_dtype = (
757
+ "object" if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
758
+ )
739
759
expected = Series ([True , False , np .nan ], dtype = expected_dtype )
740
760
tm .assert_series_equal (result , expected )
741
761
742
762
743
763
def test_match_case_kwarg (any_string_dtype ):
744
764
values = Series (["ab" , "AB" , "abc" , "ABC" ], dtype = any_string_dtype )
745
765
result = values .str .match ("ab" , case = False )
746
- expected_dtype = np .bool_ if any_string_dtype in object_pyarrow_numpy else "boolean"
766
+ expected_dtype = (
767
+ np .bool_ if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
768
+ )
747
769
expected = Series ([True , True , True , True ], dtype = expected_dtype )
748
770
tm .assert_series_equal (result , expected )
749
771
@@ -759,7 +781,9 @@ def test_fullmatch(any_string_dtype):
759
781
["fooBAD__barBAD" , "BAD_BADleroybrown" , np .nan , "foo" ], dtype = any_string_dtype
760
782
)
761
783
result = ser .str .fullmatch (".*BAD[_]+.*BAD" )
762
- expected_dtype = "object" if any_string_dtype in object_pyarrow_numpy else "boolean"
784
+ expected_dtype = (
785
+ "object" if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
786
+ )
763
787
expected = Series ([True , False , np .nan , False ], dtype = expected_dtype )
764
788
tm .assert_series_equal (result , expected )
765
789
@@ -768,7 +792,9 @@ def test_fullmatch_dollar_literal(any_string_dtype):
768
792
# GH 56652
769
793
ser = Series (["foo" , "foo$foo" , np .nan , "foo$" ], dtype = any_string_dtype )
770
794
result = ser .str .fullmatch ("foo\\ $" )
771
- expected_dtype = "object" if any_string_dtype in object_pyarrow_numpy else "boolean"
795
+ expected_dtype = (
796
+ "object" if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
797
+ )
772
798
expected = Series ([False , False , np .nan , True ], dtype = expected_dtype )
773
799
tm .assert_series_equal (result , expected )
774
800
@@ -778,14 +804,18 @@ def test_fullmatch_na_kwarg(any_string_dtype):
778
804
["fooBAD__barBAD" , "BAD_BADleroybrown" , np .nan , "foo" ], dtype = any_string_dtype
779
805
)
780
806
result = ser .str .fullmatch (".*BAD[_]+.*BAD" , na = False )
781
- expected_dtype = np .bool_ if any_string_dtype in object_pyarrow_numpy else "boolean"
807
+ expected_dtype = (
808
+ np .bool_ if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
809
+ )
782
810
expected = Series ([True , False , False , False ], dtype = expected_dtype )
783
811
tm .assert_series_equal (result , expected )
784
812
785
813
786
814
def test_fullmatch_case_kwarg (any_string_dtype , performance_warning ):
787
815
ser = Series (["ab" , "AB" , "abc" , "ABC" ], dtype = any_string_dtype )
788
- expected_dtype = np .bool_ if any_string_dtype in object_pyarrow_numpy else "boolean"
816
+ expected_dtype = (
817
+ np .bool_ if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
818
+ )
789
819
790
820
expected = Series ([True , False , False , False ], dtype = expected_dtype )
791
821
@@ -859,7 +889,9 @@ def test_find(any_string_dtype):
859
889
ser = Series (
860
890
["ABCDEFG" , "BCDEFEF" , "DEFGHIJEF" , "EFGHEF" , "XXXX" ], dtype = any_string_dtype
861
891
)
862
- expected_dtype = np .int64 if any_string_dtype in object_pyarrow_numpy else "Int64"
892
+ expected_dtype = (
893
+ np .int64 if is_object_or_nan_string_dtype (any_string_dtype ) else "Int64"
894
+ )
863
895
864
896
result = ser .str .find ("EF" )
865
897
expected = Series ([4 , 3 , 1 , 0 , - 1 ], dtype = expected_dtype )
@@ -911,7 +943,9 @@ def test_find_nan(any_string_dtype):
911
943
ser = Series (
912
944
["ABCDEFG" , np .nan , "DEFGHIJEF" , np .nan , "XXXX" ], dtype = any_string_dtype
913
945
)
914
- expected_dtype = np .float64 if any_string_dtype in object_pyarrow_numpy else "Int64"
946
+ expected_dtype = (
947
+ np .float64 if is_object_or_nan_string_dtype (any_string_dtype ) else "Int64"
948
+ )
915
949
916
950
result = ser .str .find ("EF" )
917
951
expected = Series ([4 , np .nan , 1 , np .nan , - 1 ], dtype = expected_dtype )
0 commit comments