Skip to content

Commit cf11c16

Browse files
simonjayhawkinsTLouf
authored andcommitted
[ArrowStringArray] REF: str.extract - move code from function to accessor method (pandas-dev#41541)
1 parent 0f1ac99 commit cf11c16

File tree

1 file changed

+39
-40
lines changed

1 file changed

+39
-40
lines changed

pandas/core/strings/accessor.py

+39-40
Original file line numberDiff line numberDiff line change
@@ -2385,6 +2385,11 @@ def extract(
23852385
2 NaN
23862386
dtype: object
23872387
"""
2388+
from pandas import (
2389+
DataFrame,
2390+
array as pd_array,
2391+
)
2392+
23882393
if not isinstance(expand, bool):
23892394
raise ValueError("expand must be True or False")
23902395

@@ -2396,7 +2401,40 @@ def extract(
23962401
raise ValueError("only one regex group is supported with Index")
23972402

23982403
# TODO: dispatch
2399-
return str_extract(self, pat, flags, expand=expand)
2404+
2405+
obj = self._data
2406+
result_dtype = _result_dtype(obj)
2407+
2408+
returns_df = regex.groups > 1 or expand
2409+
2410+
if returns_df:
2411+
name = None
2412+
columns = _get_group_names(regex)
2413+
2414+
if obj.array.size == 0:
2415+
result = DataFrame(columns=columns, dtype=result_dtype)
2416+
2417+
else:
2418+
result_list = _str_extract(
2419+
obj.array, pat, flags=flags, expand=returns_df
2420+
)
2421+
2422+
result_index: Index | None
2423+
if isinstance(obj, ABCSeries):
2424+
result_index = obj.index
2425+
else:
2426+
result_index = None
2427+
2428+
result = DataFrame(
2429+
result_list, columns=columns, index=result_index, dtype=result_dtype
2430+
)
2431+
2432+
else:
2433+
name = _get_single_group_name(regex)
2434+
result_arr = _str_extract(obj.array, pat, flags=flags, expand=returns_df)
2435+
# not dispatching, so we have to reconstruct here.
2436+
result = pd_array(result_arr, dtype=result_dtype)
2437+
return self._wrap_result(result, name=name)
24002438

24012439
@forbid_nonstring_types(["bytes"])
24022440
def extractall(self, pat, flags=0):
@@ -3110,45 +3148,6 @@ def f(x):
31103148
return np.array([f(val)[0] for val in np.asarray(arr)], dtype=object)
31113149

31123150

3113-
def str_extract(accessor: StringMethods, pat: str, flags: int = 0, expand: bool = True):
3114-
from pandas import (
3115-
DataFrame,
3116-
array as pd_array,
3117-
)
3118-
3119-
obj = accessor._data
3120-
result_dtype = _result_dtype(obj)
3121-
regex = re.compile(pat, flags=flags)
3122-
returns_df = regex.groups > 1 or expand
3123-
3124-
if returns_df:
3125-
name = None
3126-
columns = _get_group_names(regex)
3127-
3128-
if obj.array.size == 0:
3129-
result = DataFrame(columns=columns, dtype=result_dtype)
3130-
3131-
else:
3132-
result_list = _str_extract(obj.array, pat, flags=flags, expand=returns_df)
3133-
3134-
result_index: Index | None
3135-
if isinstance(obj, ABCSeries):
3136-
result_index = obj.index
3137-
else:
3138-
result_index = None
3139-
3140-
result = DataFrame(
3141-
result_list, columns=columns, index=result_index, dtype=result_dtype
3142-
)
3143-
3144-
else:
3145-
name = _get_single_group_name(regex)
3146-
result_arr = _str_extract(obj.array, pat, flags=flags, expand=returns_df)
3147-
# not dispatching, so we have to reconstruct here.
3148-
result = pd_array(result_arr, dtype=result_dtype)
3149-
return accessor._wrap_result(result, name=name)
3150-
3151-
31523151
def str_extractall(arr, pat, flags=0):
31533152
regex = re.compile(pat, flags=flags)
31543153
# the regex must contain capture groups.

0 commit comments

Comments
 (0)