Skip to content

[ArrowStringArray] PERF: use pyarrow.compute.replace_substring(_regex) if available #41590

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
May 26, 2021
23 changes: 23 additions & 0 deletions pandas/core/arrays/string_arrow.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

from collections.abc import Callable # noqa: PDF001
import re
from typing import (
TYPE_CHECKING,
Expand Down Expand Up @@ -834,6 +835,28 @@ def _str_endswith(self, pat: str, na=None):
pat = re.escape(pat) + "$"
return self._str_contains(pat, na=na, regex=True)

def _str_replace(
self,
pat: str | re.Pattern,
repl: str | Callable,
n: int = -1,
case: bool = True,
flags: int = 0,
regex: bool = True,
):
if (
pa_version_under4p0
or isinstance(pat, re.Pattern)
or callable(repl)
or not case
or flags
):
return super()._str_replace(pat, repl, n, case, flags, regex)

func = pc.replace_substring_regex if regex else pc.replace_substring
result = func(self._data, pattern=pat, replacement=repl, max_replacements=n)
return type(self)(result)

def _str_match(
self, pat: str, case: bool = True, flags: int = 0, na: Scalar = None
):
Expand Down