@@ -2385,6 +2385,11 @@ def extract(
2385
2385
2 NaN
2386
2386
dtype: object
2387
2387
"""
2388
+ from pandas import (
2389
+ DataFrame ,
2390
+ array as pd_array ,
2391
+ )
2392
+
2388
2393
if not isinstance (expand , bool ):
2389
2394
raise ValueError ("expand must be True or False" )
2390
2395
@@ -2396,7 +2401,40 @@ def extract(
2396
2401
raise ValueError ("only one regex group is supported with Index" )
2397
2402
2398
2403
# TODO: dispatch
2399
- return str_extract (self , pat , flags , expand = expand )
2404
+
2405
+ obj = self ._data
2406
+ result_dtype = _result_dtype (obj )
2407
+
2408
+ returns_df = regex .groups > 1 or expand
2409
+
2410
+ if returns_df :
2411
+ name = None
2412
+ columns = _get_group_names (regex )
2413
+
2414
+ if obj .array .size == 0 :
2415
+ result = DataFrame (columns = columns , dtype = result_dtype )
2416
+
2417
+ else :
2418
+ result_list = _str_extract (
2419
+ obj .array , pat , flags = flags , expand = returns_df
2420
+ )
2421
+
2422
+ result_index : Index | None
2423
+ if isinstance (obj , ABCSeries ):
2424
+ result_index = obj .index
2425
+ else :
2426
+ result_index = None
2427
+
2428
+ result = DataFrame (
2429
+ result_list , columns = columns , index = result_index , dtype = result_dtype
2430
+ )
2431
+
2432
+ else :
2433
+ name = _get_single_group_name (regex )
2434
+ result_arr = _str_extract (obj .array , pat , flags = flags , expand = returns_df )
2435
+ # not dispatching, so we have to reconstruct here.
2436
+ result = pd_array (result_arr , dtype = result_dtype )
2437
+ return self ._wrap_result (result , name = name )
2400
2438
2401
2439
@forbid_nonstring_types (["bytes" ])
2402
2440
def extractall (self , pat , flags = 0 ):
@@ -3110,45 +3148,6 @@ def f(x):
3110
3148
return np .array ([f (val )[0 ] for val in np .asarray (arr )], dtype = object )
3111
3149
3112
3150
3113
- def str_extract (accessor : StringMethods , pat : str , flags : int = 0 , expand : bool = True ):
3114
- from pandas import (
3115
- DataFrame ,
3116
- array as pd_array ,
3117
- )
3118
-
3119
- obj = accessor ._data
3120
- result_dtype = _result_dtype (obj )
3121
- regex = re .compile (pat , flags = flags )
3122
- returns_df = regex .groups > 1 or expand
3123
-
3124
- if returns_df :
3125
- name = None
3126
- columns = _get_group_names (regex )
3127
-
3128
- if obj .array .size == 0 :
3129
- result = DataFrame (columns = columns , dtype = result_dtype )
3130
-
3131
- else :
3132
- result_list = _str_extract (obj .array , pat , flags = flags , expand = returns_df )
3133
-
3134
- result_index : Index | None
3135
- if isinstance (obj , ABCSeries ):
3136
- result_index = obj .index
3137
- else :
3138
- result_index = None
3139
-
3140
- result = DataFrame (
3141
- result_list , columns = columns , index = result_index , dtype = result_dtype
3142
- )
3143
-
3144
- else :
3145
- name = _get_single_group_name (regex )
3146
- result_arr = _str_extract (obj .array , pat , flags = flags , expand = returns_df )
3147
- # not dispatching, so we have to reconstruct here.
3148
- result = pd_array (result_arr , dtype = result_dtype )
3149
- return accessor ._wrap_result (result , name = name )
3150
-
3151
-
3152
3151
def str_extractall (arr , pat , flags = 0 ):
3153
3152
regex = re .compile (pat , flags = flags )
3154
3153
# the regex must contain capture groups.
0 commit comments