Skip to content

Commit de51d33

Browse files
String dtype: remove fallback Perfomance warnings for string methods (#59760)
1 parent 50ac190 commit de51d33

File tree

6 files changed

+27
-117
lines changed

6 files changed

+27
-117
lines changed

pandas/core/arrays/arrow/_arrow_utils.py

-19
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,8 @@
11
from __future__ import annotations
22

3-
import warnings
4-
53
import numpy as np
64
import pyarrow
75

8-
from pandas._config.config import get_option
9-
10-
from pandas.errors import PerformanceWarning
11-
from pandas.util._exceptions import find_stack_level
12-
13-
14-
def fallback_performancewarning(version: str | None = None) -> None:
15-
"""
16-
Raise a PerformanceWarning for falling back to ExtensionArray's
17-
non-pyarrow method
18-
"""
19-
if get_option("performance_warnings"):
20-
msg = "Falling back on a non-pyarrow code path which may decrease performance."
21-
if version is not None:
22-
msg += f" Upgrade to pyarrow >={version} to possibly suppress this warning."
23-
warnings.warn(msg, PerformanceWarning, stacklevel=find_stack_level())
24-
256

267
def pyarrow_array_to_numpy_and_mask(
278
arr, dtype: np.dtype

pandas/core/arrays/string_arrow.py

-8
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,6 @@
1010

1111
import numpy as np
1212

13-
from pandas._config.config import get_option
14-
1513
from pandas._libs import (
1614
lib,
1715
missing as libmissing,
@@ -43,8 +41,6 @@
4341
import pyarrow as pa
4442
import pyarrow.compute as pc
4543

46-
from pandas.core.arrays.arrow._arrow_utils import fallback_performancewarning
47-
4844

4945
if TYPE_CHECKING:
5046
from collections.abc import (
@@ -300,8 +296,6 @@ def _str_contains(
300296
self, pat, case: bool = True, flags: int = 0, na=np.nan, regex: bool = True
301297
):
302298
if flags:
303-
if get_option("mode.performance_warnings"):
304-
fallback_performancewarning()
305299
return super()._str_contains(pat, case, flags, na, regex)
306300

307301
if not isna(na):
@@ -327,8 +321,6 @@ def _str_replace(
327321
regex: bool = True,
328322
):
329323
if isinstance(pat, re.Pattern) or callable(repl) or not case or flags:
330-
if get_option("mode.performance_warnings"):
331-
fallback_performancewarning()
332324
return super()._str_replace(pat, repl, n, case, flags, regex)
333325

334326
return ArrowExtensionArray._str_replace(self, pat, repl, n, case, flags, regex)

pandas/tests/extension/test_string.py

-1
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,6 @@ def test_compare_scalar(self, data, comparison_op):
209209
ser = pd.Series(data)
210210
self._compare_other(ser, data, comparison_op, "abc")
211211

212-
@pytest.mark.filterwarnings("ignore:Falling back:pandas.errors.PerformanceWarning")
213212
def test_groupby_extension_apply(self, data_for_grouping, groupby_apply_op):
214213
super().test_groupby_extension_apply(data_for_grouping, groupby_apply_op)
215214

pandas/tests/indexes/test_setops.py

-12
Original file line numberDiff line numberDiff line change
@@ -246,9 +246,6 @@ def test_intersection_base(self, index):
246246
with pytest.raises(TypeError, match=msg):
247247
first.intersection([1, 2, 3])
248248

249-
@pytest.mark.filterwarnings(
250-
"ignore:Falling back on a non-pyarrow:pandas.errors.PerformanceWarning"
251-
)
252249
@pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
253250
def test_union_base(self, index):
254251
index = index.unique()
@@ -276,9 +273,6 @@ def test_union_base(self, index):
276273
first.union([1, 2, 3])
277274

278275
@pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
279-
@pytest.mark.filterwarnings(
280-
"ignore:Falling back on a non-pyarrow:pandas.errors.PerformanceWarning"
281-
)
282276
def test_difference_base(self, sort, index):
283277
first = index[2:]
284278
second = index[:4]
@@ -305,9 +299,6 @@ def test_difference_base(self, sort, index):
305299
first.difference([1, 2, 3], sort)
306300

307301
@pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
308-
@pytest.mark.filterwarnings(
309-
"ignore:Falling back on a non-pyarrow:pandas.errors.PerformanceWarning"
310-
)
311302
def test_symmetric_difference(self, index):
312303
if isinstance(index, CategoricalIndex):
313304
pytest.skip(f"Not relevant for {type(index).__name__}")
@@ -529,9 +520,6 @@ def test_intersection_difference_match_empty(self, index, sort):
529520

530521

531522
@pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
532-
@pytest.mark.filterwarnings(
533-
"ignore:Falling back on a non-pyarrow:pandas.errors.PerformanceWarning"
534-
)
535523
@pytest.mark.parametrize(
536524
"method", ["intersection", "union", "difference", "symmetric_difference"]
537525
)

pandas/tests/strings/test_find_replace.py

+27-76
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,6 @@
2121
# --------------------------------------------------------------------------------------
2222

2323

24-
def using_pyarrow(dtype):
25-
return dtype == "string" and dtype.storage == "pyarrow"
26-
27-
2824
def test_contains(any_string_dtype):
2925
values = np.array(
3026
["foo", np.nan, "fooommm__foo", "mmm_", "foommm[_]+bar"], dtype=np.object_
@@ -458,13 +454,10 @@ def test_replace_mixed_object():
458454
tm.assert_series_equal(result, expected)
459455

460456

461-
def test_replace_unicode(any_string_dtype, performance_warning):
457+
def test_replace_unicode(any_string_dtype):
462458
ser = Series([b"abcd,\xc3\xa0".decode("utf-8")], dtype=any_string_dtype)
463459
expected = Series([b"abcd, \xc3\xa0".decode("utf-8")], dtype=any_string_dtype)
464-
with tm.maybe_produces_warning(
465-
performance_warning, using_pyarrow(any_string_dtype)
466-
):
467-
result = ser.str.replace(r"(?<=\w),(?=\w)", ", ", flags=re.UNICODE, regex=True)
460+
result = ser.str.replace(r"(?<=\w),(?=\w)", ", ", flags=re.UNICODE, regex=True)
468461
tm.assert_series_equal(result, expected)
469462

470463

@@ -478,24 +471,21 @@ def test_replace_wrong_repl_type_raises(any_string_dtype, index_or_series, repl,
478471
obj.str.replace("a", repl)
479472

480473

481-
def test_replace_callable(any_string_dtype, performance_warning):
474+
def test_replace_callable(any_string_dtype):
482475
# GH 15055
483476
ser = Series(["fooBAD__barBAD", np.nan], dtype=any_string_dtype)
484477

485478
# test with callable
486479
repl = lambda m: m.group(0).swapcase()
487-
with tm.maybe_produces_warning(
488-
performance_warning, using_pyarrow(any_string_dtype)
489-
):
490-
result = ser.str.replace("[a-z][A-Z]{2}", repl, n=2, regex=True)
480+
result = ser.str.replace("[a-z][A-Z]{2}", repl, n=2, regex=True)
491481
expected = Series(["foObaD__baRbaD", np.nan], dtype=any_string_dtype)
492482
tm.assert_series_equal(result, expected)
493483

494484

495485
@pytest.mark.parametrize(
496486
"repl", [lambda: None, lambda m, x: None, lambda m, x, y=None: None]
497487
)
498-
def test_replace_callable_raises(any_string_dtype, performance_warning, repl):
488+
def test_replace_callable_raises(any_string_dtype, repl):
499489
# GH 15055
500490
values = Series(["fooBAD__barBAD", np.nan], dtype=any_string_dtype)
501491

@@ -504,43 +494,31 @@ def test_replace_callable_raises(any_string_dtype, performance_warning, repl):
504494
r"((takes)|(missing)) (?(2)from \d+ to )?\d+ "
505495
r"(?(3)required )positional arguments?"
506496
)
507-
if not using_pyarrow(any_string_dtype):
508-
performance_warning = False
509497
with pytest.raises(TypeError, match=msg):
510-
with tm.assert_produces_warning(performance_warning):
511-
values.str.replace("a", repl, regex=True)
498+
values.str.replace("a", repl, regex=True)
512499

513500

514-
def test_replace_callable_named_groups(any_string_dtype, performance_warning):
501+
def test_replace_callable_named_groups(any_string_dtype):
515502
# test regex named groups
516503
ser = Series(["Foo Bar Baz", np.nan], dtype=any_string_dtype)
517504
pat = r"(?P<first>\w+) (?P<middle>\w+) (?P<last>\w+)"
518505
repl = lambda m: m.group("middle").swapcase()
519-
with tm.maybe_produces_warning(
520-
performance_warning, using_pyarrow(any_string_dtype)
521-
):
522-
result = ser.str.replace(pat, repl, regex=True)
506+
result = ser.str.replace(pat, repl, regex=True)
523507
expected = Series(["bAR", np.nan], dtype=any_string_dtype)
524508
tm.assert_series_equal(result, expected)
525509

526510

527-
def test_replace_compiled_regex(any_string_dtype, performance_warning):
511+
def test_replace_compiled_regex(any_string_dtype):
528512
# GH 15446
529513
ser = Series(["fooBAD__barBAD", np.nan], dtype=any_string_dtype)
530514

531515
# test with compiled regex
532516
pat = re.compile(r"BAD_*")
533-
with tm.maybe_produces_warning(
534-
performance_warning, using_pyarrow(any_string_dtype)
535-
):
536-
result = ser.str.replace(pat, "", regex=True)
517+
result = ser.str.replace(pat, "", regex=True)
537518
expected = Series(["foobar", np.nan], dtype=any_string_dtype)
538519
tm.assert_series_equal(result, expected)
539520

540-
with tm.maybe_produces_warning(
541-
performance_warning, using_pyarrow(any_string_dtype)
542-
):
543-
result = ser.str.replace(pat, "", n=1, regex=True)
521+
result = ser.str.replace(pat, "", n=1, regex=True)
544522
expected = Series(["foobarBAD", np.nan], dtype=any_string_dtype)
545523
tm.assert_series_equal(result, expected)
546524

@@ -557,14 +535,11 @@ def test_replace_compiled_regex_mixed_object():
557535
tm.assert_series_equal(result, expected)
558536

559537

560-
def test_replace_compiled_regex_unicode(any_string_dtype, performance_warning):
538+
def test_replace_compiled_regex_unicode(any_string_dtype):
561539
ser = Series([b"abcd,\xc3\xa0".decode("utf-8")], dtype=any_string_dtype)
562540
expected = Series([b"abcd, \xc3\xa0".decode("utf-8")], dtype=any_string_dtype)
563541
pat = re.compile(r"(?<=\w),(?=\w)", flags=re.UNICODE)
564-
with tm.maybe_produces_warning(
565-
performance_warning, using_pyarrow(any_string_dtype)
566-
):
567-
result = ser.str.replace(pat, ", ", regex=True)
542+
result = ser.str.replace(pat, ", ", regex=True)
568543
tm.assert_series_equal(result, expected)
569544

570545

@@ -586,15 +561,12 @@ def test_replace_compiled_regex_raises(any_string_dtype):
586561
ser.str.replace(pat, "", case=True, regex=True)
587562

588563

589-
def test_replace_compiled_regex_callable(any_string_dtype, performance_warning):
564+
def test_replace_compiled_regex_callable(any_string_dtype):
590565
# test with callable
591566
ser = Series(["fooBAD__barBAD", np.nan], dtype=any_string_dtype)
592567
repl = lambda m: m.group(0).swapcase()
593568
pat = re.compile("[a-z][A-Z]{2}")
594-
with tm.maybe_produces_warning(
595-
performance_warning, using_pyarrow(any_string_dtype)
596-
):
597-
result = ser.str.replace(pat, repl, n=2, regex=True)
569+
result = ser.str.replace(pat, repl, n=2, regex=True)
598570
expected = Series(["foObaD__baRbaD", np.nan], dtype=any_string_dtype)
599571
tm.assert_series_equal(result, expected)
600572

@@ -626,7 +598,7 @@ def test_replace_literal_compiled_raises(any_string_dtype):
626598
ser.str.replace(pat, "", regex=False)
627599

628600

629-
def test_replace_moar(any_string_dtype, performance_warning):
601+
def test_replace_moar(any_string_dtype):
630602
# PR #1179
631603
ser = Series(
632604
["A", "B", "C", "Aaba", "Baca", "", np.nan, "CABA", "dog", "cat"],
@@ -640,10 +612,7 @@ def test_replace_moar(any_string_dtype, performance_warning):
640612
)
641613
tm.assert_series_equal(result, expected)
642614

643-
with tm.maybe_produces_warning(
644-
performance_warning, using_pyarrow(any_string_dtype)
645-
):
646-
result = ser.str.replace("A", "YYY", case=False)
615+
result = ser.str.replace("A", "YYY", case=False)
647616
expected = Series(
648617
[
649618
"YYY",
@@ -661,10 +630,7 @@ def test_replace_moar(any_string_dtype, performance_warning):
661630
)
662631
tm.assert_series_equal(result, expected)
663632

664-
with tm.maybe_produces_warning(
665-
performance_warning, using_pyarrow(any_string_dtype)
666-
):
667-
result = ser.str.replace("^.a|dog", "XX-XX ", case=False, regex=True)
633+
result = ser.str.replace("^.a|dog", "XX-XX ", case=False, regex=True)
668634
expected = Series(
669635
[
670636
"A",
@@ -683,21 +649,15 @@ def test_replace_moar(any_string_dtype, performance_warning):
683649
tm.assert_series_equal(result, expected)
684650

685651

686-
def test_replace_not_case_sensitive_not_regex(any_string_dtype, performance_warning):
652+
def test_replace_not_case_sensitive_not_regex(any_string_dtype):
687653
# https://github.com/pandas-dev/pandas/issues/41602
688654
ser = Series(["A.", "a.", "Ab", "ab", np.nan], dtype=any_string_dtype)
689655

690-
with tm.maybe_produces_warning(
691-
performance_warning, using_pyarrow(any_string_dtype)
692-
):
693-
result = ser.str.replace("a", "c", case=False, regex=False)
656+
result = ser.str.replace("a", "c", case=False, regex=False)
694657
expected = Series(["c.", "c.", "cb", "cb", np.nan], dtype=any_string_dtype)
695658
tm.assert_series_equal(result, expected)
696659

697-
with tm.maybe_produces_warning(
698-
performance_warning, using_pyarrow(any_string_dtype)
699-
):
700-
result = ser.str.replace("a.", "c.", case=False, regex=False)
660+
result = ser.str.replace("a.", "c.", case=False, regex=False)
701661
expected = Series(["c.", "c.", "Ab", "ab", np.nan], dtype=any_string_dtype)
702662
tm.assert_series_equal(result, expected)
703663

@@ -853,7 +813,7 @@ def test_fullmatch_na_kwarg(any_string_dtype):
853813
tm.assert_series_equal(result, expected)
854814

855815

856-
def test_fullmatch_case_kwarg(any_string_dtype, performance_warning):
816+
def test_fullmatch_case_kwarg(any_string_dtype):
857817
ser = Series(["ab", "AB", "abc", "ABC"], dtype=any_string_dtype)
858818
expected_dtype = (
859819
np.bool_ if is_object_or_nan_string_dtype(any_string_dtype) else "boolean"
@@ -869,10 +829,7 @@ def test_fullmatch_case_kwarg(any_string_dtype, performance_warning):
869829
result = ser.str.fullmatch("ab", case=False)
870830
tm.assert_series_equal(result, expected)
871831

872-
with tm.maybe_produces_warning(
873-
performance_warning, using_pyarrow(any_string_dtype)
874-
):
875-
result = ser.str.fullmatch("ab", flags=re.IGNORECASE)
832+
result = ser.str.fullmatch("ab", flags=re.IGNORECASE)
876833
tm.assert_series_equal(result, expected)
877834

878835

@@ -1046,7 +1003,7 @@ def test_translate_mixed_object():
10461003
# --------------------------------------------------------------------------------------
10471004

10481005

1049-
def test_flags_kwarg(any_string_dtype, performance_warning):
1006+
def test_flags_kwarg(any_string_dtype):
10501007
data = {
10511008
"Dave": "[email protected]",
10521009
"Steve": "[email protected]",
@@ -1057,17 +1014,13 @@ def test_flags_kwarg(any_string_dtype, performance_warning):
10571014

10581015
pat = r"([A-Z0-9._%+-]+)@([A-Z0-9.-]+)\.([A-Z]{2,4})"
10591016

1060-
use_pyarrow = using_pyarrow(any_string_dtype)
1061-
10621017
result = data.str.extract(pat, flags=re.IGNORECASE, expand=True)
10631018
assert result.iloc[0].tolist() == ["dave", "google", "com"]
10641019

1065-
with tm.maybe_produces_warning(performance_warning, use_pyarrow):
1066-
result = data.str.match(pat, flags=re.IGNORECASE)
1020+
result = data.str.match(pat, flags=re.IGNORECASE)
10671021
assert result.iloc[0]
10681022

1069-
with tm.maybe_produces_warning(performance_warning, use_pyarrow):
1070-
result = data.str.fullmatch(pat, flags=re.IGNORECASE)
1023+
result = data.str.fullmatch(pat, flags=re.IGNORECASE)
10711024
assert result.iloc[0]
10721025

10731026
result = data.str.findall(pat, flags=re.IGNORECASE)
@@ -1077,8 +1030,6 @@ def test_flags_kwarg(any_string_dtype, performance_warning):
10771030
assert result.iloc[0] == 1
10781031

10791032
msg = "has match groups"
1080-
with tm.assert_produces_warning(
1081-
UserWarning, match=msg, raise_on_extra_warnings=not use_pyarrow
1082-
):
1033+
with tm.assert_produces_warning(UserWarning, match=msg):
10831034
result = data.str.contains(pat, flags=re.IGNORECASE)
10841035
assert result.iloc[0]

pandas/tests/strings/test_string_array.py

-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
)
1313

1414

15-
@pytest.mark.filterwarnings("ignore:Falling back")
1615
def test_string_array(nullable_string_dtype, any_string_method):
1716
method_name, args, kwargs = any_string_method
1817

0 commit comments

Comments
 (0)