Skip to content

Commit 26a0d56

Browse files
String dtype: remove fallback Perfomance warnings for string methods (pandas-dev#59760)
1 parent 418f890 commit 26a0d56

File tree

6 files changed

+23
-83
lines changed

6 files changed

+23
-83
lines changed

pandas/core/arrays/arrow/_arrow_utils.py

-16
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,8 @@
11
from __future__ import annotations
22

3-
import warnings
4-
53
import numpy as np
64
import pyarrow
75

8-
from pandas.errors import PerformanceWarning
9-
from pandas.util._exceptions import find_stack_level
10-
11-
12-
def fallback_performancewarning(version: str | None = None) -> None:
13-
"""
14-
Raise a PerformanceWarning for falling back to ExtensionArray's
15-
non-pyarrow method
16-
"""
17-
msg = "Falling back on a non-pyarrow code path which may decrease performance."
18-
if version is not None:
19-
msg += f" Upgrade to pyarrow >={version} to possibly suppress this warning."
20-
warnings.warn(msg, PerformanceWarning, stacklevel=find_stack_level())
21-
226

237
def pyarrow_array_to_numpy_and_mask(
248
arr, dtype: np.dtype

pandas/core/arrays/string_arrow.py

-4
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,6 @@
4242
import pyarrow as pa
4343
import pyarrow.compute as pc
4444

45-
from pandas.core.arrays.arrow._arrow_utils import fallback_performancewarning
46-
4745

4846
if TYPE_CHECKING:
4947
from collections.abc import Sequence
@@ -301,7 +299,6 @@ def _str_contains(
301299
self, pat, case: bool = True, flags: int = 0, na=np.nan, regex: bool = True
302300
):
303301
if flags:
304-
fallback_performancewarning()
305302
return super()._str_contains(pat, case, flags, na, regex)
306303

307304
if not isna(na):
@@ -327,7 +324,6 @@ def _str_replace(
327324
regex: bool = True,
328325
):
329326
if isinstance(pat, re.Pattern) or callable(repl) or not case or flags:
330-
fallback_performancewarning()
331327
return super()._str_replace(pat, repl, n, case, flags, regex)
332328

333329
return ArrowExtensionArray._str_replace(self, pat, repl, n, case, flags, regex)

pandas/tests/extension/test_string.py

-1
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,6 @@ def test_compare_scalar(self, data, comparison_op):
212212
ser = pd.Series(data)
213213
self._compare_other(ser, data, comparison_op, "abc")
214214

215-
@pytest.mark.filterwarnings("ignore:Falling back:pandas.errors.PerformanceWarning")
216215
def test_groupby_extension_apply(self, data_for_grouping, groupby_apply_op):
217216
super().test_groupby_extension_apply(data_for_grouping, groupby_apply_op)
218217

pandas/tests/indexes/test_setops.py

-12
Original file line numberDiff line numberDiff line change
@@ -240,9 +240,6 @@ def test_intersection_base(self, index):
240240
with pytest.raises(TypeError, match=msg):
241241
first.intersection([1, 2, 3])
242242

243-
@pytest.mark.filterwarnings(
244-
"ignore:Falling back on a non-pyarrow:pandas.errors.PerformanceWarning"
245-
)
246243
@pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
247244
def test_union_base(self, index):
248245
index = index.unique()
@@ -270,9 +267,6 @@ def test_union_base(self, index):
270267
first.union([1, 2, 3])
271268

272269
@pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
273-
@pytest.mark.filterwarnings(
274-
"ignore:Falling back on a non-pyarrow:pandas.errors.PerformanceWarning"
275-
)
276270
def test_difference_base(self, sort, index):
277271
first = index[2:]
278272
second = index[:4]
@@ -299,9 +293,6 @@ def test_difference_base(self, sort, index):
299293
first.difference([1, 2, 3], sort)
300294

301295
@pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
302-
@pytest.mark.filterwarnings(
303-
"ignore:Falling back on a non-pyarrow:pandas.errors.PerformanceWarning"
304-
)
305296
def test_symmetric_difference(self, index):
306297
if isinstance(index, CategoricalIndex):
307298
pytest.skip(f"Not relevant for {type(index).__name__}")
@@ -523,9 +514,6 @@ def test_intersection_difference_match_empty(self, index, sort):
523514

524515

525516
@pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
526-
@pytest.mark.filterwarnings(
527-
"ignore:Falling back on a non-pyarrow:pandas.errors.PerformanceWarning"
528-
)
529517
@pytest.mark.parametrize(
530518
"method", ["intersection", "union", "difference", "symmetric_difference"]
531519
)

pandas/tests/strings/test_find_replace.py

+23-49
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,6 @@
44
import numpy as np
55
import pytest
66

7-
from pandas._config import using_string_dtype
8-
9-
from pandas.compat import HAS_PYARROW
10-
from pandas.errors import PerformanceWarning
117
import pandas.util._test_decorators as td
128

139
import pandas as pd
@@ -25,10 +21,6 @@
2521
# --------------------------------------------------------------------------------------
2622

2723

28-
def using_pyarrow(dtype):
29-
return dtype == "string" and dtype.storage == "pyarrow"
30-
31-
3224
def test_contains(any_string_dtype):
3325
values = np.array(
3426
["foo", np.nan, "fooommm__foo", "mmm_", "foommm[_]+bar"], dtype=np.object_
@@ -281,10 +273,13 @@ def test_contains_nan(any_string_dtype):
281273
# --------------------------------------------------------------------------------------
282274

283275

284-
@pytest.mark.xfail(
285-
using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)", strict=False
286-
)
287-
def test_startswith_endswith_validate_na(any_string_dtype):
276+
def test_startswith_endswith_validate_na(request, any_string_dtype):
277+
if (
278+
any_string_dtype == "string"
279+
and any_string_dtype.na_value is np.nan
280+
and any_string_dtype.storage == "python"
281+
):
282+
request.applymarker(pytest.mark.xfail(reason="TODO(infer_string)"))
288283
# GH#59615
289284
ser = Series(
290285
["om", np.nan, "foo_nom", "nom", "bar_foo", np.nan, "foo"],
@@ -462,8 +457,7 @@ def test_replace_mixed_object():
462457
def test_replace_unicode(any_string_dtype):
463458
ser = Series([b"abcd,\xc3\xa0".decode("utf-8")], dtype=any_string_dtype)
464459
expected = Series([b"abcd, \xc3\xa0".decode("utf-8")], dtype=any_string_dtype)
465-
with tm.maybe_produces_warning(PerformanceWarning, using_pyarrow(any_string_dtype)):
466-
result = ser.str.replace(r"(?<=\w),(?=\w)", ", ", flags=re.UNICODE, regex=True)
460+
result = ser.str.replace(r"(?<=\w),(?=\w)", ", ", flags=re.UNICODE, regex=True)
467461
tm.assert_series_equal(result, expected)
468462

469463

@@ -483,8 +477,7 @@ def test_replace_callable(any_string_dtype):
483477

484478
# test with callable
485479
repl = lambda m: m.group(0).swapcase()
486-
with tm.maybe_produces_warning(PerformanceWarning, using_pyarrow(any_string_dtype)):
487-
result = ser.str.replace("[a-z][A-Z]{2}", repl, n=2, regex=True)
480+
result = ser.str.replace("[a-z][A-Z]{2}", repl, n=2, regex=True)
488481
expected = Series(["foObaD__baRbaD", np.nan], dtype=any_string_dtype)
489482
tm.assert_series_equal(result, expected)
490483

@@ -502,19 +495,15 @@ def test_replace_callable_raises(any_string_dtype, repl):
502495
r"(?(3)required )positional arguments?"
503496
)
504497
with pytest.raises(TypeError, match=msg):
505-
with tm.maybe_produces_warning(
506-
PerformanceWarning, using_pyarrow(any_string_dtype)
507-
):
508-
values.str.replace("a", repl, regex=True)
498+
values.str.replace("a", repl, regex=True)
509499

510500

511501
def test_replace_callable_named_groups(any_string_dtype):
512502
# test regex named groups
513503
ser = Series(["Foo Bar Baz", np.nan], dtype=any_string_dtype)
514504
pat = r"(?P<first>\w+) (?P<middle>\w+) (?P<last>\w+)"
515505
repl = lambda m: m.group("middle").swapcase()
516-
with tm.maybe_produces_warning(PerformanceWarning, using_pyarrow(any_string_dtype)):
517-
result = ser.str.replace(pat, repl, regex=True)
506+
result = ser.str.replace(pat, repl, regex=True)
518507
expected = Series(["bAR", np.nan], dtype=any_string_dtype)
519508
tm.assert_series_equal(result, expected)
520509

@@ -525,13 +514,11 @@ def test_replace_compiled_regex(any_string_dtype):
525514

526515
# test with compiled regex
527516
pat = re.compile(r"BAD_*")
528-
with tm.maybe_produces_warning(PerformanceWarning, using_pyarrow(any_string_dtype)):
529-
result = ser.str.replace(pat, "", regex=True)
517+
result = ser.str.replace(pat, "", regex=True)
530518
expected = Series(["foobar", np.nan], dtype=any_string_dtype)
531519
tm.assert_series_equal(result, expected)
532520

533-
with tm.maybe_produces_warning(PerformanceWarning, using_pyarrow(any_string_dtype)):
534-
result = ser.str.replace(pat, "", n=1, regex=True)
521+
result = ser.str.replace(pat, "", n=1, regex=True)
535522
expected = Series(["foobarBAD", np.nan], dtype=any_string_dtype)
536523
tm.assert_series_equal(result, expected)
537524

@@ -552,8 +539,7 @@ def test_replace_compiled_regex_unicode(any_string_dtype):
552539
ser = Series([b"abcd,\xc3\xa0".decode("utf-8")], dtype=any_string_dtype)
553540
expected = Series([b"abcd, \xc3\xa0".decode("utf-8")], dtype=any_string_dtype)
554541
pat = re.compile(r"(?<=\w),(?=\w)", flags=re.UNICODE)
555-
with tm.maybe_produces_warning(PerformanceWarning, using_pyarrow(any_string_dtype)):
556-
result = ser.str.replace(pat, ", ", regex=True)
542+
result = ser.str.replace(pat, ", ", regex=True)
557543
tm.assert_series_equal(result, expected)
558544

559545

@@ -580,8 +566,7 @@ def test_replace_compiled_regex_callable(any_string_dtype):
580566
ser = Series(["fooBAD__barBAD", np.nan], dtype=any_string_dtype)
581567
repl = lambda m: m.group(0).swapcase()
582568
pat = re.compile("[a-z][A-Z]{2}")
583-
with tm.maybe_produces_warning(PerformanceWarning, using_pyarrow(any_string_dtype)):
584-
result = ser.str.replace(pat, repl, n=2, regex=True)
569+
result = ser.str.replace(pat, repl, n=2, regex=True)
585570
expected = Series(["foObaD__baRbaD", np.nan], dtype=any_string_dtype)
586571
tm.assert_series_equal(result, expected)
587572

@@ -629,8 +614,7 @@ def test_replace_moar(any_string_dtype):
629614
)
630615
tm.assert_series_equal(result, expected)
631616

632-
with tm.maybe_produces_warning(PerformanceWarning, using_pyarrow(any_string_dtype)):
633-
result = ser.str.replace("A", "YYY", case=False)
617+
result = ser.str.replace("A", "YYY", case=False)
634618
expected = Series(
635619
[
636620
"YYY",
@@ -648,8 +632,7 @@ def test_replace_moar(any_string_dtype):
648632
)
649633
tm.assert_series_equal(result, expected)
650634

651-
with tm.maybe_produces_warning(PerformanceWarning, using_pyarrow(any_string_dtype)):
652-
result = ser.str.replace("^.a|dog", "XX-XX ", case=False, regex=True)
635+
result = ser.str.replace("^.a|dog", "XX-XX ", case=False, regex=True)
653636
expected = Series(
654637
[
655638
"A",
@@ -672,13 +655,11 @@ def test_replace_not_case_sensitive_not_regex(any_string_dtype):
672655
# https://github.com/pandas-dev/pandas/issues/41602
673656
ser = Series(["A.", "a.", "Ab", "ab", np.nan], dtype=any_string_dtype)
674657

675-
with tm.maybe_produces_warning(PerformanceWarning, using_pyarrow(any_string_dtype)):
676-
result = ser.str.replace("a", "c", case=False, regex=False)
658+
result = ser.str.replace("a", "c", case=False, regex=False)
677659
expected = Series(["c.", "c.", "cb", "cb", np.nan], dtype=any_string_dtype)
678660
tm.assert_series_equal(result, expected)
679661

680-
with tm.maybe_produces_warning(PerformanceWarning, using_pyarrow(any_string_dtype)):
681-
result = ser.str.replace("a.", "c.", case=False, regex=False)
662+
result = ser.str.replace("a.", "c.", case=False, regex=False)
682663
expected = Series(["c.", "c.", "Ab", "ab", np.nan], dtype=any_string_dtype)
683664
tm.assert_series_equal(result, expected)
684665

@@ -850,8 +831,7 @@ def test_fullmatch_case_kwarg(any_string_dtype):
850831
result = ser.str.fullmatch("ab", case=False)
851832
tm.assert_series_equal(result, expected)
852833

853-
with tm.maybe_produces_warning(PerformanceWarning, using_pyarrow(any_string_dtype)):
854-
result = ser.str.fullmatch("ab", flags=re.IGNORECASE)
834+
result = ser.str.fullmatch("ab", flags=re.IGNORECASE)
855835
tm.assert_series_equal(result, expected)
856836

857837

@@ -1036,17 +1016,13 @@ def test_flags_kwarg(any_string_dtype):
10361016

10371017
pat = r"([A-Z0-9._%+-]+)@([A-Z0-9.-]+)\.([A-Z]{2,4})"
10381018

1039-
use_pyarrow = using_pyarrow(any_string_dtype)
1040-
10411019
result = data.str.extract(pat, flags=re.IGNORECASE, expand=True)
10421020
assert result.iloc[0].tolist() == ["dave", "google", "com"]
10431021

1044-
with tm.maybe_produces_warning(PerformanceWarning, use_pyarrow):
1045-
result = data.str.match(pat, flags=re.IGNORECASE)
1022+
result = data.str.match(pat, flags=re.IGNORECASE)
10461023
assert result.iloc[0]
10471024

1048-
with tm.maybe_produces_warning(PerformanceWarning, use_pyarrow):
1049-
result = data.str.fullmatch(pat, flags=re.IGNORECASE)
1025+
result = data.str.fullmatch(pat, flags=re.IGNORECASE)
10501026
assert result.iloc[0]
10511027

10521028
result = data.str.findall(pat, flags=re.IGNORECASE)
@@ -1056,8 +1032,6 @@ def test_flags_kwarg(any_string_dtype):
10561032
assert result.iloc[0] == 1
10571033

10581034
msg = "has match groups"
1059-
with tm.assert_produces_warning(
1060-
UserWarning, match=msg, raise_on_extra_warnings=not use_pyarrow
1061-
):
1035+
with tm.assert_produces_warning(UserWarning, match=msg):
10621036
result = data.str.contains(pat, flags=re.IGNORECASE)
10631037
assert result.iloc[0]

pandas/tests/strings/test_string_array.py

-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
)
1313

1414

15-
@pytest.mark.filterwarnings("ignore:Falling back")
1615
def test_string_array(nullable_string_dtype, any_string_method):
1716
method_name, args, kwargs = any_string_method
1817

0 commit comments

Comments
 (0)