Skip to content

Commit 622cf5c

Browse files
andrewkittredgegouthambs
authored andcommitted
str_extract should work for timeseries, bug 6348
1 parent 4df6669 commit 622cf5c

File tree

2 files changed

+22
-2
lines changed

2 files changed

+22
-2
lines changed

pandas/core/strings.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -451,11 +451,15 @@ def f(x):
451451
else:
452452
return empty_row
453453
if regex.groups == 1:
454-
result = Series([f(val)[0] for val in arr], name=regex.groupindex.get(1))
454+
result = Series([f(val)[0] for val in arr],
455+
name=regex.groupindex.get(1),
456+
index=arr.index)
455457
else:
456458
names = dict(zip(regex.groupindex.values(), regex.groupindex.keys()))
457459
columns = [names.get(1 + i, i) for i in range(regex.groups)]
458-
result = DataFrame([f(val) for val in arr], columns=columns)
460+
result = DataFrame([f(val) for val in arr],
461+
columns=columns,
462+
index=arr.index)
459463
return result
460464

461465

pandas/tests/test_strings.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -547,6 +547,22 @@ def test_extract(self):
547547
result = Series(['A1', 'B2', 'C']).str.extract('(?P<letter>[ABC])(?P<number>[123])?')
548548
exp = DataFrame([['A', '1'], ['B', '2'], ['C', NA]], columns=['letter', 'number'])
549549
tm.assert_frame_equal(result, exp)
550+
551+
def check_index(index):
552+
data = ['A1', 'B2', 'C']
553+
index = index[:len(data)]
554+
result = Series(data, index=index).str.extract('(\d)')
555+
exp = Series(['1', '2', NA], index=index)
556+
tm.assert_series_equal(result, exp)
557+
558+
result = Series(data, index=index).str.extract('(?P<letter>\D)(?P<number>\d)?')
559+
exp = DataFrame([['A', '1'], ['B', '2'], ['C', NA]], columns=['letter', 'number'], index=index)
560+
tm.assert_frame_equal(result, exp)
561+
562+
for index in [ tm.makeStringIndex, tm.makeUnicodeIndex, tm.makeIntIndex,
563+
tm.makeDateIndex, tm.makePeriodIndex ]:
564+
check_index(index())
565+
550566

551567
def test_get_dummies(self):
552568
s = Series(['a|b', 'a|c', np.nan])

0 commit comments

Comments
 (0)