Skip to content

Commit 8efa4ac

Browse files
committed
Merge branch 'str_extract_time_series_bug' of https://github.com/andrewkittredge/pandas into andrewkittredge-str_extract_time_series_bug
2 parents a9a46d6 + 7aeab9a commit 8efa4ac

File tree

3 files changed

+26
-2
lines changed

3 files changed

+26
-2
lines changed

doc/source/release.rst

+1
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,7 @@ Bug Fixes
171171
- Perf issue in concatting with empty objects (:issue:`3259`)
172172
- Clarify sorting of ``sym_diff`` on ``Index``es with ``NaN``s (:isssue:`6444`)
173173
- Regression in ``MultiIndex.from_product`` with a ``DatetimeIndex`` as input (:issue:`6439`)
174+
- Bug in ``str.extract`` when passed a non-default index (:issue:`6348`)
174175

175176
pandas 0.13.1
176177
-------------

pandas/core/strings.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -451,11 +451,15 @@ def f(x):
451451
else:
452452
return empty_row
453453
if regex.groups == 1:
454-
result = Series([f(val)[0] for val in arr], name=regex.groupindex.get(1))
454+
result = Series([f(val)[0] for val in arr],
455+
name=regex.groupindex.get(1),
456+
index=arr.index)
455457
else:
456458
names = dict(zip(regex.groupindex.values(), regex.groupindex.keys()))
457459
columns = [names.get(1 + i, i) for i in range(regex.groups)]
458-
result = DataFrame([f(val) for val in arr], columns=columns)
460+
result = DataFrame([f(val) for val in arr],
461+
columns=columns,
462+
index=arr.index)
459463
return result
460464

461465

pandas/tests/test_strings.py

+19
Original file line numberDiff line numberDiff line change
@@ -548,6 +548,25 @@ def test_extract(self):
548548
exp = DataFrame([['A', '1'], ['B', '2'], ['C', NA]], columns=['letter', 'number'])
549549
tm.assert_frame_equal(result, exp)
550550

551+
# GH6348
552+
# not passing index to the extractor
553+
import pdb; pdb.set_trace()
554+
def check_index(index):
555+
data = ['A1', 'B2', 'C']
556+
index = index[:len(data)]
557+
result = Series(data, index=index).str.extract('(\d)')
558+
exp = Series(['1', '2', NA], index=index)
559+
tm.assert_series_equal(result, exp)
560+
561+
result = Series(data, index=index).str.extract('(?P<letter>\D)(?P<number>\d)?')
562+
exp = DataFrame([['A', '1'], ['B', '2'], ['C', NA]], columns=['letter', 'number'], index=index)
563+
tm.assert_frame_equal(result, exp)
564+
565+
for index in [ tm.makeStringIndex, tm.makeUnicodeIndex, tm.makeIntIndex,
566+
tm.makeDateIndex, tm.makePeriodIndex ]:
567+
check_index(index())
568+
569+
551570
def test_get_dummies(self):
552571
s = Series(['a|b', 'a|c', np.nan])
553572
result = s.str.get_dummies('|')

0 commit comments

Comments
 (0)