14
14
)
15
15
from pandas .tests .strings import (
16
16
_convert_na_value ,
17
- object_pyarrow_numpy ,
17
+ is_object_or_nan_string_dtype ,
18
18
)
19
19
20
20
# --------------------------------------------------------------------------------------
@@ -34,7 +34,9 @@ def test_contains(any_string_dtype):
34
34
pat = "mmm[_]+"
35
35
36
36
result = values .str .contains (pat )
37
- expected_dtype = "object" if any_string_dtype in object_pyarrow_numpy else "boolean"
37
+ expected_dtype = (
38
+ "object" if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
39
+ )
38
40
expected = Series (
39
41
np .array ([False , np .nan , True , True , False ], dtype = np .object_ ),
40
42
dtype = expected_dtype ,
@@ -53,7 +55,9 @@ def test_contains(any_string_dtype):
53
55
dtype = any_string_dtype ,
54
56
)
55
57
result = values .str .contains (pat )
56
- expected_dtype = np .bool_ if any_string_dtype in object_pyarrow_numpy else "boolean"
58
+ expected_dtype = (
59
+ np .bool_ if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
60
+ )
57
61
expected = Series (np .array ([False , False , True , True ]), dtype = expected_dtype )
58
62
tm .assert_series_equal (result , expected )
59
63
@@ -80,14 +84,18 @@ def test_contains(any_string_dtype):
80
84
pat = "mmm[_]+"
81
85
82
86
result = values .str .contains (pat )
83
- expected_dtype = "object" if any_string_dtype in object_pyarrow_numpy else "boolean"
87
+ expected_dtype = (
88
+ "object" if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
89
+ )
84
90
expected = Series (
85
91
np .array ([False , np .nan , True , True ], dtype = np .object_ ), dtype = expected_dtype
86
92
)
87
93
tm .assert_series_equal (result , expected )
88
94
89
95
result = values .str .contains (pat , na = False )
90
- expected_dtype = np .bool_ if any_string_dtype in object_pyarrow_numpy else "boolean"
96
+ expected_dtype = (
97
+ np .bool_ if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
98
+ )
91
99
expected = Series (np .array ([False , False , True , True ]), dtype = expected_dtype )
92
100
tm .assert_series_equal (result , expected )
93
101
@@ -172,7 +180,9 @@ def test_contains_moar(any_string_dtype):
172
180
)
173
181
174
182
result = s .str .contains ("a" )
175
- expected_dtype = "object" if any_string_dtype in object_pyarrow_numpy else "boolean"
183
+ expected_dtype = (
184
+ "object" if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
185
+ )
176
186
expected = Series (
177
187
[False , False , False , True , True , False , np .nan , False , False , True ],
178
188
dtype = expected_dtype ,
@@ -213,7 +223,9 @@ def test_contains_nan(any_string_dtype):
213
223
s = Series ([np .nan , np .nan , np .nan ], dtype = any_string_dtype )
214
224
215
225
result = s .str .contains ("foo" , na = False )
216
- expected_dtype = np .bool_ if any_string_dtype in object_pyarrow_numpy else "boolean"
226
+ expected_dtype = (
227
+ np .bool_ if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
228
+ )
217
229
expected = Series ([False , False , False ], dtype = expected_dtype )
218
230
tm .assert_series_equal (result , expected )
219
231
@@ -231,7 +243,9 @@ def test_contains_nan(any_string_dtype):
231
243
tm .assert_series_equal (result , expected )
232
244
233
245
result = s .str .contains ("foo" )
234
- expected_dtype = "object" if any_string_dtype in object_pyarrow_numpy else "boolean"
246
+ expected_dtype = (
247
+ "object" if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
248
+ )
235
249
expected = Series ([np .nan , np .nan , np .nan ], dtype = expected_dtype )
236
250
tm .assert_series_equal (result , expected )
237
251
@@ -641,7 +655,9 @@ def test_replace_regex_single_character(regex, any_string_dtype):
641
655
642
656
def test_match (any_string_dtype ):
643
657
# New match behavior introduced in 0.13
644
- expected_dtype = "object" if any_string_dtype in object_pyarrow_numpy else "boolean"
658
+ expected_dtype = (
659
+ "object" if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
660
+ )
645
661
646
662
values = Series (["fooBAD__barBAD" , np .nan , "foo" ], dtype = any_string_dtype )
647
663
result = values .str .match (".*(BAD[_]+).*(BAD)" )
@@ -696,20 +712,26 @@ def test_match_na_kwarg(any_string_dtype):
696
712
s = Series (["a" , "b" , np .nan ], dtype = any_string_dtype )
697
713
698
714
result = s .str .match ("a" , na = False )
699
- expected_dtype = np .bool_ if any_string_dtype in object_pyarrow_numpy else "boolean"
715
+ expected_dtype = (
716
+ np .bool_ if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
717
+ )
700
718
expected = Series ([True , False , False ], dtype = expected_dtype )
701
719
tm .assert_series_equal (result , expected )
702
720
703
721
result = s .str .match ("a" )
704
- expected_dtype = "object" if any_string_dtype in object_pyarrow_numpy else "boolean"
722
+ expected_dtype = (
723
+ "object" if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
724
+ )
705
725
expected = Series ([True , False , np .nan ], dtype = expected_dtype )
706
726
tm .assert_series_equal (result , expected )
707
727
708
728
709
729
def test_match_case_kwarg (any_string_dtype ):
710
730
values = Series (["ab" , "AB" , "abc" , "ABC" ], dtype = any_string_dtype )
711
731
result = values .str .match ("ab" , case = False )
712
- expected_dtype = np .bool_ if any_string_dtype in object_pyarrow_numpy else "boolean"
732
+ expected_dtype = (
733
+ np .bool_ if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
734
+ )
713
735
expected = Series ([True , True , True , True ], dtype = expected_dtype )
714
736
tm .assert_series_equal (result , expected )
715
737
@@ -725,7 +747,9 @@ def test_fullmatch(any_string_dtype):
725
747
["fooBAD__barBAD" , "BAD_BADleroybrown" , np .nan , "foo" ], dtype = any_string_dtype
726
748
)
727
749
result = ser .str .fullmatch (".*BAD[_]+.*BAD" )
728
- expected_dtype = "object" if any_string_dtype in object_pyarrow_numpy else "boolean"
750
+ expected_dtype = (
751
+ "object" if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
752
+ )
729
753
expected = Series ([True , False , np .nan , False ], dtype = expected_dtype )
730
754
tm .assert_series_equal (result , expected )
731
755
@@ -734,7 +758,9 @@ def test_fullmatch_dollar_literal(any_string_dtype):
734
758
# GH 56652
735
759
ser = Series (["foo" , "foo$foo" , np .nan , "foo$" ], dtype = any_string_dtype )
736
760
result = ser .str .fullmatch ("foo\\ $" )
737
- expected_dtype = "object" if any_string_dtype in object_pyarrow_numpy else "boolean"
761
+ expected_dtype = (
762
+ "object" if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
763
+ )
738
764
expected = Series ([False , False , np .nan , True ], dtype = expected_dtype )
739
765
tm .assert_series_equal (result , expected )
740
766
@@ -744,14 +770,18 @@ def test_fullmatch_na_kwarg(any_string_dtype):
744
770
["fooBAD__barBAD" , "BAD_BADleroybrown" , np .nan , "foo" ], dtype = any_string_dtype
745
771
)
746
772
result = ser .str .fullmatch (".*BAD[_]+.*BAD" , na = False )
747
- expected_dtype = np .bool_ if any_string_dtype in object_pyarrow_numpy else "boolean"
773
+ expected_dtype = (
774
+ np .bool_ if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
775
+ )
748
776
expected = Series ([True , False , False , False ], dtype = expected_dtype )
749
777
tm .assert_series_equal (result , expected )
750
778
751
779
752
780
def test_fullmatch_case_kwarg (any_string_dtype ):
753
781
ser = Series (["ab" , "AB" , "abc" , "ABC" ], dtype = any_string_dtype )
754
- expected_dtype = np .bool_ if any_string_dtype in object_pyarrow_numpy else "boolean"
782
+ expected_dtype = (
783
+ np .bool_ if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
784
+ )
755
785
756
786
expected = Series ([True , False , False , False ], dtype = expected_dtype )
757
787
@@ -823,7 +853,9 @@ def test_find(any_string_dtype):
823
853
ser = Series (
824
854
["ABCDEFG" , "BCDEFEF" , "DEFGHIJEF" , "EFGHEF" , "XXXX" ], dtype = any_string_dtype
825
855
)
826
- expected_dtype = np .int64 if any_string_dtype in object_pyarrow_numpy else "Int64"
856
+ expected_dtype = (
857
+ np .int64 if is_object_or_nan_string_dtype (any_string_dtype ) else "Int64"
858
+ )
827
859
828
860
result = ser .str .find ("EF" )
829
861
expected = Series ([4 , 3 , 1 , 0 , - 1 ], dtype = expected_dtype )
@@ -875,7 +907,9 @@ def test_find_nan(any_string_dtype):
875
907
ser = Series (
876
908
["ABCDEFG" , np .nan , "DEFGHIJEF" , np .nan , "XXXX" ], dtype = any_string_dtype
877
909
)
878
- expected_dtype = np .float64 if any_string_dtype in object_pyarrow_numpy else "Int64"
910
+ expected_dtype = (
911
+ np .float64 if is_object_or_nan_string_dtype (any_string_dtype ) else "Int64"
912
+ )
879
913
880
914
result = ser .str .find ("EF" )
881
915
expected = Series ([4 , np .nan , 1 , np .nan , - 1 ], dtype = expected_dtype )
0 commit comments