Skip to content

Commit d405bf2

Browse files
GeraintDuckjreback
authored andcommitted
BUG: Fix for Series.str.extractall (single group with quantifier)
closes #13382 closes #13397
1 parent d5bea25 commit d405bf2

File tree

3 files changed

+17
-0
lines changed

3 files changed

+17
-0
lines changed

doc/source/whatsnew/v0.18.2.txt

+1
Original file line numberDiff line numberDiff line change
@@ -336,6 +336,7 @@ Bug Fixes
336336
- Bug in ``SeriesGroupBy.transform`` with datetime values and missing groups (:issue:`13191`)
337337

338338
- Bug in ``Series.str.extractall()`` with ``str`` index raises ``ValueError`` (:issue:`13156`)
339+
- Bug in ``Series.str.extractall()`` with single group and quantifier (:issue:`13382`)
339340

340341

341342
- Bug in ``PeriodIndex`` and ``Period`` subtraction raises ``AttributeError`` (:issue:`13071`)

pandas/core/strings.py

+2
Original file line numberDiff line numberDiff line change
@@ -708,6 +708,8 @@ def str_extractall(arr, pat, flags=0):
708708
subject_key = (subject_key, )
709709

710710
for match_i, match_tuple in enumerate(regex.findall(subject)):
711+
if isinstance(match_tuple, compat.string_types):
712+
match_tuple = (match_tuple,)
711713
na_tuple = [np.NaN if group == "" else group
712714
for group in match_tuple]
713715
match_list.append(na_tuple)

pandas/tests/test_strings.py

+14
Original file line numberDiff line numberDiff line change
@@ -977,6 +977,20 @@ def test_extractall_single_group(self):
977977
e = DataFrame(['a', 'b', 'd', 'c'], i)
978978
tm.assert_frame_equal(r, e)
979979

980+
def test_extractall_single_group_with_quantifier(self):
981+
# extractall(one un-named group with quantifier) returns
982+
# DataFrame with one un-named column (GH13382).
983+
s = Series(['ab3', 'abc3', 'd4cd2'], name='series_name')
984+
r = s.str.extractall(r'([a-z]+)')
985+
i = MultiIndex.from_tuples([
986+
(0, 0),
987+
(1, 0),
988+
(2, 0),
989+
(2, 1),
990+
], names=(None, "match"))
991+
e = DataFrame(['ab', 'abc', 'd', 'cd'], i)
992+
tm.assert_frame_equal(r, e)
993+
980994
def test_extractall_no_matches(self):
981995
s = Series(['a3', 'b3', 'd4c2'], name='series_name')
982996
# one un-named group.

0 commit comments

Comments
 (0)