4
4
import numpy as np
5
5
import pytest
6
6
7
- from pandas ._config import using_string_dtype
8
-
9
- from pandas .compat import HAS_PYARROW
10
- from pandas .errors import PerformanceWarning
11
7
import pandas .util ._test_decorators as td
12
8
13
9
import pandas as pd
25
21
# --------------------------------------------------------------------------------------
26
22
27
23
28
- def using_pyarrow (dtype ):
29
- return dtype == "string" and dtype .storage == "pyarrow"
30
-
31
-
32
24
def test_contains (any_string_dtype ):
33
25
values = np .array (
34
26
["foo" , np .nan , "fooommm__foo" , "mmm_" , "foommm[_]+bar" ], dtype = np .object_
@@ -281,10 +273,13 @@ def test_contains_nan(any_string_dtype):
281
273
# --------------------------------------------------------------------------------------
282
274
283
275
284
- @pytest .mark .xfail (
285
- using_string_dtype () and not HAS_PYARROW , reason = "TODO(infer_string)" , strict = False
286
- )
287
- def test_startswith_endswith_validate_na (any_string_dtype ):
276
+ def test_startswith_endswith_validate_na (request , any_string_dtype ):
277
+ if (
278
+ any_string_dtype == "string"
279
+ and any_string_dtype .na_value is np .nan
280
+ and any_string_dtype .storage == "python"
281
+ ):
282
+ request .applymarker (pytest .mark .xfail (reason = "TODO(infer_string)" ))
288
283
# GH#59615
289
284
ser = Series (
290
285
["om" , np .nan , "foo_nom" , "nom" , "bar_foo" , np .nan , "foo" ],
@@ -462,8 +457,7 @@ def test_replace_mixed_object():
462
457
def test_replace_unicode (any_string_dtype ):
463
458
ser = Series ([b"abcd,\xc3 \xa0 " .decode ("utf-8" )], dtype = any_string_dtype )
464
459
expected = Series ([b"abcd, \xc3 \xa0 " .decode ("utf-8" )], dtype = any_string_dtype )
465
- with tm .maybe_produces_warning (PerformanceWarning , using_pyarrow (any_string_dtype )):
466
- result = ser .str .replace (r"(?<=\w),(?=\w)" , ", " , flags = re .UNICODE , regex = True )
460
+ result = ser .str .replace (r"(?<=\w),(?=\w)" , ", " , flags = re .UNICODE , regex = True )
467
461
tm .assert_series_equal (result , expected )
468
462
469
463
@@ -483,8 +477,7 @@ def test_replace_callable(any_string_dtype):
483
477
484
478
# test with callable
485
479
repl = lambda m : m .group (0 ).swapcase ()
486
- with tm .maybe_produces_warning (PerformanceWarning , using_pyarrow (any_string_dtype )):
487
- result = ser .str .replace ("[a-z][A-Z]{2}" , repl , n = 2 , regex = True )
480
+ result = ser .str .replace ("[a-z][A-Z]{2}" , repl , n = 2 , regex = True )
488
481
expected = Series (["foObaD__baRbaD" , np .nan ], dtype = any_string_dtype )
489
482
tm .assert_series_equal (result , expected )
490
483
@@ -502,19 +495,15 @@ def test_replace_callable_raises(any_string_dtype, repl):
502
495
r"(?(3)required )positional arguments?"
503
496
)
504
497
with pytest .raises (TypeError , match = msg ):
505
- with tm .maybe_produces_warning (
506
- PerformanceWarning , using_pyarrow (any_string_dtype )
507
- ):
508
- values .str .replace ("a" , repl , regex = True )
498
+ values .str .replace ("a" , repl , regex = True )
509
499
510
500
511
501
def test_replace_callable_named_groups (any_string_dtype ):
512
502
# test regex named groups
513
503
ser = Series (["Foo Bar Baz" , np .nan ], dtype = any_string_dtype )
514
504
pat = r"(?P<first>\w+) (?P<middle>\w+) (?P<last>\w+)"
515
505
repl = lambda m : m .group ("middle" ).swapcase ()
516
- with tm .maybe_produces_warning (PerformanceWarning , using_pyarrow (any_string_dtype )):
517
- result = ser .str .replace (pat , repl , regex = True )
506
+ result = ser .str .replace (pat , repl , regex = True )
518
507
expected = Series (["bAR" , np .nan ], dtype = any_string_dtype )
519
508
tm .assert_series_equal (result , expected )
520
509
@@ -525,13 +514,11 @@ def test_replace_compiled_regex(any_string_dtype):
525
514
526
515
# test with compiled regex
527
516
pat = re .compile (r"BAD_*" )
528
- with tm .maybe_produces_warning (PerformanceWarning , using_pyarrow (any_string_dtype )):
529
- result = ser .str .replace (pat , "" , regex = True )
517
+ result = ser .str .replace (pat , "" , regex = True )
530
518
expected = Series (["foobar" , np .nan ], dtype = any_string_dtype )
531
519
tm .assert_series_equal (result , expected )
532
520
533
- with tm .maybe_produces_warning (PerformanceWarning , using_pyarrow (any_string_dtype )):
534
- result = ser .str .replace (pat , "" , n = 1 , regex = True )
521
+ result = ser .str .replace (pat , "" , n = 1 , regex = True )
535
522
expected = Series (["foobarBAD" , np .nan ], dtype = any_string_dtype )
536
523
tm .assert_series_equal (result , expected )
537
524
@@ -552,8 +539,7 @@ def test_replace_compiled_regex_unicode(any_string_dtype):
552
539
ser = Series ([b"abcd,\xc3 \xa0 " .decode ("utf-8" )], dtype = any_string_dtype )
553
540
expected = Series ([b"abcd, \xc3 \xa0 " .decode ("utf-8" )], dtype = any_string_dtype )
554
541
pat = re .compile (r"(?<=\w),(?=\w)" , flags = re .UNICODE )
555
- with tm .maybe_produces_warning (PerformanceWarning , using_pyarrow (any_string_dtype )):
556
- result = ser .str .replace (pat , ", " , regex = True )
542
+ result = ser .str .replace (pat , ", " , regex = True )
557
543
tm .assert_series_equal (result , expected )
558
544
559
545
@@ -580,8 +566,7 @@ def test_replace_compiled_regex_callable(any_string_dtype):
580
566
ser = Series (["fooBAD__barBAD" , np .nan ], dtype = any_string_dtype )
581
567
repl = lambda m : m .group (0 ).swapcase ()
582
568
pat = re .compile ("[a-z][A-Z]{2}" )
583
- with tm .maybe_produces_warning (PerformanceWarning , using_pyarrow (any_string_dtype )):
584
- result = ser .str .replace (pat , repl , n = 2 , regex = True )
569
+ result = ser .str .replace (pat , repl , n = 2 , regex = True )
585
570
expected = Series (["foObaD__baRbaD" , np .nan ], dtype = any_string_dtype )
586
571
tm .assert_series_equal (result , expected )
587
572
@@ -629,8 +614,7 @@ def test_replace_moar(any_string_dtype):
629
614
)
630
615
tm .assert_series_equal (result , expected )
631
616
632
- with tm .maybe_produces_warning (PerformanceWarning , using_pyarrow (any_string_dtype )):
633
- result = ser .str .replace ("A" , "YYY" , case = False )
617
+ result = ser .str .replace ("A" , "YYY" , case = False )
634
618
expected = Series (
635
619
[
636
620
"YYY" ,
@@ -648,8 +632,7 @@ def test_replace_moar(any_string_dtype):
648
632
)
649
633
tm .assert_series_equal (result , expected )
650
634
651
- with tm .maybe_produces_warning (PerformanceWarning , using_pyarrow (any_string_dtype )):
652
- result = ser .str .replace ("^.a|dog" , "XX-XX " , case = False , regex = True )
635
+ result = ser .str .replace ("^.a|dog" , "XX-XX " , case = False , regex = True )
653
636
expected = Series (
654
637
[
655
638
"A" ,
@@ -672,13 +655,11 @@ def test_replace_not_case_sensitive_not_regex(any_string_dtype):
672
655
# https://github.com/pandas-dev/pandas/issues/41602
673
656
ser = Series (["A." , "a." , "Ab" , "ab" , np .nan ], dtype = any_string_dtype )
674
657
675
- with tm .maybe_produces_warning (PerformanceWarning , using_pyarrow (any_string_dtype )):
676
- result = ser .str .replace ("a" , "c" , case = False , regex = False )
658
+ result = ser .str .replace ("a" , "c" , case = False , regex = False )
677
659
expected = Series (["c." , "c." , "cb" , "cb" , np .nan ], dtype = any_string_dtype )
678
660
tm .assert_series_equal (result , expected )
679
661
680
- with tm .maybe_produces_warning (PerformanceWarning , using_pyarrow (any_string_dtype )):
681
- result = ser .str .replace ("a." , "c." , case = False , regex = False )
662
+ result = ser .str .replace ("a." , "c." , case = False , regex = False )
682
663
expected = Series (["c." , "c." , "Ab" , "ab" , np .nan ], dtype = any_string_dtype )
683
664
tm .assert_series_equal (result , expected )
684
665
@@ -850,8 +831,7 @@ def test_fullmatch_case_kwarg(any_string_dtype):
850
831
result = ser .str .fullmatch ("ab" , case = False )
851
832
tm .assert_series_equal (result , expected )
852
833
853
- with tm .maybe_produces_warning (PerformanceWarning , using_pyarrow (any_string_dtype )):
854
- result = ser .str .fullmatch ("ab" , flags = re .IGNORECASE )
834
+ result = ser .str .fullmatch ("ab" , flags = re .IGNORECASE )
855
835
tm .assert_series_equal (result , expected )
856
836
857
837
@@ -1036,17 +1016,13 @@ def test_flags_kwarg(any_string_dtype):
1036
1016
1037
1017
pat = r"([A-Z0-9._%+-]+)@([A-Z0-9.-]+)\.([A-Z]{2,4})"
1038
1018
1039
- use_pyarrow = using_pyarrow (any_string_dtype )
1040
-
1041
1019
result = data .str .extract (pat , flags = re .IGNORECASE , expand = True )
1042
1020
assert result .iloc [0 ].tolist () == ["dave" , "google" , "com" ]
1043
1021
1044
- with tm .maybe_produces_warning (PerformanceWarning , use_pyarrow ):
1045
- result = data .str .match (pat , flags = re .IGNORECASE )
1022
+ result = data .str .match (pat , flags = re .IGNORECASE )
1046
1023
assert result .iloc [0 ]
1047
1024
1048
- with tm .maybe_produces_warning (PerformanceWarning , use_pyarrow ):
1049
- result = data .str .fullmatch (pat , flags = re .IGNORECASE )
1025
+ result = data .str .fullmatch (pat , flags = re .IGNORECASE )
1050
1026
assert result .iloc [0 ]
1051
1027
1052
1028
result = data .str .findall (pat , flags = re .IGNORECASE )
@@ -1056,8 +1032,6 @@ def test_flags_kwarg(any_string_dtype):
1056
1032
assert result .iloc [0 ] == 1
1057
1033
1058
1034
msg = "has match groups"
1059
- with tm .assert_produces_warning (
1060
- UserWarning , match = msg , raise_on_extra_warnings = not use_pyarrow
1061
- ):
1035
+ with tm .assert_produces_warning (UserWarning , match = msg ):
1062
1036
result = data .str .contains (pat , flags = re .IGNORECASE )
1063
1037
assert result .iloc [0 ]
0 commit comments