@@ -439,41 +439,28 @@ def str_extract(arr, pat, flags=0):
439
439
440
440
"""
441
441
regex = re .compile (pat , flags = flags )
442
-
443
442
# just to be safe, check this
444
443
if regex .groups == 0 :
445
444
raise ValueError ("This pattern contains no groups to capture." )
446
- elif regex .groups == 1 :
447
- def f (x ):
448
- if not isinstance (x , compat .string_types ):
449
- return None
450
- m = regex .search (x )
451
- if m :
452
- return m .groups ()[0 ] # may be None
453
- else :
454
- return None
445
+ empty_row = [np .nan ]* regex .groups
446
+ def f (x ):
447
+ if not isinstance (x , compat .string_types ):
448
+ return empty_row
449
+ m = regex .search (x )
450
+ if m :
451
+ return [np .nan if item is None else item for item in m .groups ()]
452
+ else :
453
+ return empty_row
454
+ if regex .groups == 1 :
455
+ result = Series ([f (val )[0 ] for val in arr ], name = regex .groupindex .get (1 ))
455
456
else :
456
- empty_row = Series (regex .groups * [None ])
457
-
458
- def f (x ):
459
- if not isinstance (x , compat .string_types ):
460
- return empty_row
461
- m = regex .search (x )
462
- if m :
463
- return Series (list (m .groups ())) # may contain None
464
- else :
465
- return empty_row
466
- result = arr .apply (f )
467
- result .replace ({None : np .nan }, inplace = True )
468
- if regex .groups > 1 :
469
- result = DataFrame (result ) # Don't rely on the wrapper; name columns.
470
457
names = dict (zip (regex .groupindex .values (), regex .groupindex .keys ()))
471
- result .columns = [names .get (1 + i , i ) for i in range (regex .groups )]
472
- else :
473
- result .name = regex .groupindex .get (0 )
458
+ columns = [names .get (1 + i , i ) for i in range (regex .groups )]
459
+ result = DataFrame ([f (val ) for val in arr ], columns = columns )
474
460
return result
475
461
476
462
463
+
477
464
def str_join (arr , sep ):
478
465
"""
479
466
Join lists contained as elements in array, a la str.join
0 commit comments