Skip to content

Commit 2bbf515

Browse files
committed
REF: move implementations to mixing class
1 parent 0360c9a commit 2bbf515

File tree

3 files changed

+56
-82
lines changed

3 files changed

+56
-82
lines changed

pandas/core/arrays/_arrow_string_mixins.py

+45
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,37 @@ def _convert_int_result(self, result):
5050
def _apply_elementwise(self, func: Callable) -> list[list[Any]]:
5151
raise NotImplementedError
5252

53+
def _str_len(self):
54+
result = pc.utf8_length(self._pa_array)
55+
return self._convert_int_result(result)
56+
57+
def _str_lower(self) -> Self:
58+
return type(self)(pc.utf8_lower(self._pa_array))
59+
60+
def _str_upper(self) -> Self:
61+
return type(self)(pc.utf8_upper(self._pa_array))
62+
63+
def _str_strip(self, to_strip=None) -> Self:
64+
if to_strip is None:
65+
result = pc.utf8_trim_whitespace(self._pa_array)
66+
else:
67+
result = pc.utf8_trim(self._pa_array, characters=to_strip)
68+
return type(self)(result)
69+
70+
def _str_lstrip(self, to_strip=None) -> Self:
71+
if to_strip is None:
72+
result = pc.utf8_ltrim_whitespace(self._pa_array)
73+
else:
74+
result = pc.utf8_ltrim(self._pa_array, characters=to_strip)
75+
return type(self)(result)
76+
77+
def _str_rstrip(self, to_strip=None) -> Self:
78+
if to_strip is None:
79+
result = pc.utf8_rtrim_whitespace(self._pa_array)
80+
else:
81+
result = pc.utf8_rtrim(self._pa_array, characters=to_strip)
82+
return type(self)(result)
83+
5384
def _str_pad(
5485
self,
5586
width: int,
@@ -247,3 +278,17 @@ def _str_find(self, sub: str, start: int = 0, end: int | None = None):
247278
offset_result = pc.add(result, start_offset)
248279
result = pc.if_else(found, offset_result, -1)
249280
return self._convert_int_result(result)
281+
282+
def _str_match(
283+
self, pat: str, case: bool = True, flags: int = 0, na: Scalar | None = None
284+
):
285+
if not pat.startswith("^"):
286+
pat = f"^{pat}"
287+
return self._str_contains(pat, case, flags, na, regex=True)
288+
289+
def _str_fullmatch(
290+
self, pat, case: bool = True, flags: int = 0, na: Scalar | None = None
291+
):
292+
if not pat.endswith("$") or pat.endswith("\\$"):
293+
pat = f"{pat}$"
294+
return self._str_match(pat, case, flags, na)

pandas/core/arrays/arrow/array.py

+1-48
Original file line numberDiff line numberDiff line change
@@ -1998,7 +1998,7 @@ def _rank(
19981998
"""
19991999
See Series.rank.__doc__.
20002000
"""
2001-
return type(self)(
2001+
return self._convert_int_result(
20022002
self._rank_calc(
20032003
axis=axis,
20042004
method=method,
@@ -2322,9 +2322,6 @@ def _str_count(self, pat: str, flags: int = 0) -> Self:
23222322
raise NotImplementedError(f"count not implemented with {flags=}")
23232323
return type(self)(pc.count_substring_regex(self._pa_array, pat))
23242324

2325-
def _result_converter(self, result):
2326-
return type(self)(result)
2327-
23282325
def _str_replace(
23292326
self,
23302327
pat: str | re.Pattern,
@@ -2359,20 +2356,6 @@ def _str_repeat(self, repeats: int | Sequence[int]) -> Self:
23592356
)
23602357
return type(self)(pc.binary_repeat(self._pa_array, repeats))
23612358

2362-
def _str_match(
2363-
self, pat: str, case: bool = True, flags: int = 0, na: Scalar | None = None
2364-
) -> Self:
2365-
if not pat.startswith("^"):
2366-
pat = f"^{pat}"
2367-
return self._str_contains(pat, case, flags, na, regex=True)
2368-
2369-
def _str_fullmatch(
2370-
self, pat, case: bool = True, flags: int = 0, na: Scalar | None = None
2371-
) -> Self:
2372-
if not pat.endswith("$") or pat.endswith("\\$"):
2373-
pat = f"{pat}$"
2374-
return self._str_match(pat, case, flags, na)
2375-
23762359
def _str_join(self, sep: str) -> Self:
23772360
if pa.types.is_string(self._pa_array.type) or pa.types.is_large_string(
23782361
self._pa_array.type
@@ -2404,36 +2387,6 @@ def _str_slice(
24042387
pc.utf8_slice_codeunits(self._pa_array, start=start, stop=stop, step=step)
24052388
)
24062389

2407-
def _str_len(self) -> Self:
2408-
return type(self)(pc.utf8_length(self._pa_array))
2409-
2410-
def _str_lower(self) -> Self:
2411-
return type(self)(pc.utf8_lower(self._pa_array))
2412-
2413-
def _str_upper(self) -> Self:
2414-
return type(self)(pc.utf8_upper(self._pa_array))
2415-
2416-
def _str_strip(self, to_strip=None) -> Self:
2417-
if to_strip is None:
2418-
result = pc.utf8_trim_whitespace(self._pa_array)
2419-
else:
2420-
result = pc.utf8_trim(self._pa_array, characters=to_strip)
2421-
return type(self)(result)
2422-
2423-
def _str_lstrip(self, to_strip=None) -> Self:
2424-
if to_strip is None:
2425-
result = pc.utf8_ltrim_whitespace(self._pa_array)
2426-
else:
2427-
result = pc.utf8_ltrim(self._pa_array, characters=to_strip)
2428-
return type(self)(result)
2429-
2430-
def _str_rstrip(self, to_strip=None) -> Self:
2431-
if to_strip is None:
2432-
result = pc.utf8_rtrim_whitespace(self._pa_array)
2433-
else:
2434-
result = pc.utf8_rtrim(self._pa_array, characters=to_strip)
2435-
return type(self)(result)
2436-
24372390
def _str_removeprefix(self, prefix: str):
24382391
if not pa_version_under13p0:
24392392
starts_with = pc.starts_with(self._pa_array, pattern=prefix)

pandas/core/arrays/string_arrow.py

+10-34
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,6 @@
5454

5555
from pandas._typing import (
5656
ArrayLike,
57-
AxisInt,
5857
Dtype,
5958
Self,
6059
npt,
@@ -292,19 +291,22 @@ def astype(self, dtype, copy: bool = True):
292291
_str_startswith = ArrowStringArrayMixin._str_startswith
293292
_str_endswith = ArrowStringArrayMixin._str_endswith
294293
_str_pad = ArrowStringArrayMixin._str_pad
295-
_str_match = ArrowExtensionArray._str_match
296-
_str_fullmatch = ArrowExtensionArray._str_fullmatch
297-
_str_lower = ArrowExtensionArray._str_lower
298-
_str_upper = ArrowExtensionArray._str_upper
299-
_str_strip = ArrowExtensionArray._str_strip
300-
_str_lstrip = ArrowExtensionArray._str_lstrip
301-
_str_rstrip = ArrowExtensionArray._str_rstrip
294+
_str_match = ArrowStringArrayMixin._str_match
295+
_str_fullmatch = ArrowStringArrayMixin._str_fullmatch
296+
_str_lower = ArrowStringArrayMixin._str_lower
297+
_str_upper = ArrowStringArrayMixin._str_upper
298+
_str_strip = ArrowStringArrayMixin._str_strip
299+
_str_lstrip = ArrowStringArrayMixin._str_lstrip
300+
_str_rstrip = ArrowStringArrayMixin._str_rstrip
302301
_str_removesuffix = ArrowStringArrayMixin._str_removesuffix
303302
_str_get = ArrowStringArrayMixin._str_get
304303
_str_capitalize = ArrowStringArrayMixin._str_capitalize
305304
_str_title = ArrowStringArrayMixin._str_title
306305
_str_swapcase = ArrowStringArrayMixin._str_swapcase
307306
_str_slice_replace = ArrowStringArrayMixin._str_slice_replace
307+
_str_len = ArrowStringArrayMixin._str_len
308+
309+
_rank = ArrowExtensionArray._rank
308310

309311
def _str_contains(
310312
self, pat, case: bool = True, flags: int = 0, na=np.nan, regex: bool = True
@@ -356,10 +358,6 @@ def _str_slice(
356358
return super()._str_slice(start, stop, step)
357359
return ArrowExtensionArray._str_slice(self, start=start, stop=stop, step=step)
358360

359-
def _str_len(self):
360-
result = pc.utf8_length(self._pa_array)
361-
return self._convert_int_result(result)
362-
363361
def _str_removeprefix(self, prefix: str):
364362
if not pa_version_under13p0:
365363
return ArrowExtensionArray._str_removeprefix(self, prefix)
@@ -421,28 +419,6 @@ def _reduce(
421419
else:
422420
return result
423421

424-
def _rank(
425-
self,
426-
*,
427-
axis: AxisInt = 0,
428-
method: str = "average",
429-
na_option: str = "keep",
430-
ascending: bool = True,
431-
pct: bool = False,
432-
):
433-
"""
434-
See Series.rank.__doc__.
435-
"""
436-
return self._convert_int_result(
437-
self._rank_calc(
438-
axis=axis,
439-
method=method,
440-
na_option=na_option,
441-
ascending=ascending,
442-
pct=pct,
443-
)
444-
)
445-
446422
def value_counts(self, dropna: bool = True) -> Series:
447423
result = super().value_counts(dropna=dropna)
448424
if self.dtype.na_value is np.nan:

0 commit comments

Comments
 (0)