Skip to content

Commit d0477b2

Browse files
jsnowackijreback
authored andcommitted
str.extractall with no match returns appropriate MultIndex (#19075)
1 parent 856c92b commit d0477b2

File tree

3 files changed

+35
-15
lines changed

3 files changed

+35
-15
lines changed

doc/source/whatsnew/v0.23.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -333,7 +333,7 @@ Indexing
333333
- Bug in indexing non-scalar value from ``Series`` having non-unique ``Index`` will return value flattened (:issue:`17610`)
334334
- Bug in :func:`DatetimeIndex.insert` where inserting ``NaT`` into a timezone-aware index incorrectly raised (:issue:`16357`)
335335
- Bug in ``__setitem__`` when indexing a :class:`DataFrame` with a 2-d boolean ndarray (:issue:`18582`)
336-
336+
- Bug in ``str.extractall`` when there were no matches empty :class:`Index` was returned instead of appropriate :class:`MultiIndex` (:issue:`19034`)
337337

338338
I/O
339339
^^^

pandas/core/strings.py

+4-6
Original file line numberDiff line numberDiff line change
@@ -794,12 +794,10 @@ def str_extractall(arr, pat, flags=0):
794794
result_key = tuple(subject_key + (match_i, ))
795795
index_list.append(result_key)
796796

797-
if 0 < len(index_list):
798-
from pandas import MultiIndex
799-
index = MultiIndex.from_tuples(
800-
index_list, names=arr.index.names + ["match"])
801-
else:
802-
index = None
797+
from pandas import MultiIndex
798+
index = MultiIndex.from_tuples(
799+
index_list, names=arr.index.names + ["match"])
800+
803801
result = arr._constructor_expanddim(match_list, index=index,
804802
columns=columns)
805803
return result

pandas/tests/test_strings.py

+30-8
Original file line numberDiff line numberDiff line change
@@ -1072,28 +1072,50 @@ def test_extractall_single_group_with_quantifier(self):
10721072
e = DataFrame(['ab', 'abc', 'd', 'cd'], i)
10731073
tm.assert_frame_equal(r, e)
10741074

1075-
def test_extractall_no_matches(self):
1076-
s = Series(['a3', 'b3', 'd4c2'], name='series_name')
1075+
@pytest.mark.parametrize('data, names', [
1076+
([], (None, )),
1077+
([], ('i1', )),
1078+
([], (None, 'i2')),
1079+
([], ('i1', 'i2')),
1080+
(['a3', 'b3', 'd4c2'], (None, )),
1081+
(['a3', 'b3', 'd4c2'], ('i1', 'i2')),
1082+
(['a3', 'b3', 'd4c2'], (None, 'i2')),
1083+
(['a3', 'b3', 'd4c2'], ('i1', 'i2')),
1084+
])
1085+
def test_extractall_no_matches(self, data, names):
1086+
# GH19075 extractall with no matches should return a valid MultiIndex
1087+
n = len(data)
1088+
if len(names) == 1:
1089+
i = Index(range(n), name=names[0])
1090+
else:
1091+
a = (tuple([i] * (n - 1)) for i in range(n))
1092+
i = MultiIndex.from_tuples(a, names=names)
1093+
s = Series(data, name='series_name', index=i, dtype='object')
1094+
ei = MultiIndex.from_tuples([], names=(names + ('match',)))
1095+
10771096
# one un-named group.
10781097
r = s.str.extractall('(z)')
1079-
e = DataFrame(columns=[0])
1098+
e = DataFrame(columns=[0], index=ei)
10801099
tm.assert_frame_equal(r, e)
1100+
10811101
# two un-named groups.
10821102
r = s.str.extractall('(z)(z)')
1083-
e = DataFrame(columns=[0, 1])
1103+
e = DataFrame(columns=[0, 1], index=ei)
10841104
tm.assert_frame_equal(r, e)
1105+
10851106
# one named group.
10861107
r = s.str.extractall('(?P<first>z)')
1087-
e = DataFrame(columns=["first"])
1108+
e = DataFrame(columns=["first"], index=ei)
10881109
tm.assert_frame_equal(r, e)
1110+
10891111
# two named groups.
10901112
r = s.str.extractall('(?P<first>z)(?P<second>z)')
1091-
e = DataFrame(columns=["first", "second"])
1113+
e = DataFrame(columns=["first", "second"], index=ei)
10921114
tm.assert_frame_equal(r, e)
1115+
10931116
# one named, one un-named.
10941117
r = s.str.extractall('(z)(?P<second>z)')
1095-
e = DataFrame(columns=[0,
1096-
"second"])
1118+
e = DataFrame(columns=[0, "second"], index=ei)
10971119
tm.assert_frame_equal(r, e)
10981120

10991121
def test_extractall_stringindex(self):

0 commit comments

Comments
 (0)