diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index a77e93c4ab4be..70b4364e6be11 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -175,7 +175,7 @@ Performance improvements - Performance improvement in :meth:`RangeIndex.append` when appending the same index (:issue:`57252`) - Performance improvement in :meth:`RangeIndex.take` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57445`) - Performance improvement in indexing operations for string dtypes (:issue:`56997`) -- +- :meth:`Series.str.extract` returns a :class:`RangeIndex` columns instead of an :class:`Index` column when possible (:issue:`?``) .. --------------------------------------------------------------------------- .. _whatsnew_300.bug_fixes: diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index fe68107a953bb..0dddfc4f4c4c1 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -3557,7 +3557,7 @@ def _get_single_group_name(regex: re.Pattern) -> Hashable: return None -def _get_group_names(regex: re.Pattern) -> list[Hashable]: +def _get_group_names(regex: re.Pattern) -> list[Hashable] | range: """ Get named groups from compiled regex. @@ -3571,8 +3571,15 @@ def _get_group_names(regex: re.Pattern) -> list[Hashable]: ------- list of column labels """ + rng = range(regex.groups) names = {v: k for k, v in regex.groupindex.items()} - return [names.get(1 + i, i) for i in range(regex.groups)] + if not names: + return rng + result: list[Hashable] = [names.get(1 + i, i) for i in rng] + arr = np.array(result) + if arr.dtype.kind == "i" and lib.is_range_indexer(arr, len(arr)): + return rng + return result def str_extractall(arr, pat, flags: int = 0) -> DataFrame: diff --git a/pandas/tests/strings/test_extract.py b/pandas/tests/strings/test_extract.py index 7ebcbdc7a8533..1dc7dbabe85b0 100644 --- a/pandas/tests/strings/test_extract.py +++ b/pandas/tests/strings/test_extract.py @@ -563,13 +563,13 @@ def test_extractall_no_matches(data, names, any_string_dtype): # one un-named group. result = s.str.extractall("(z)") - expected = DataFrame(columns=[0], index=expected_index, dtype=any_string_dtype) - tm.assert_frame_equal(result, expected) + expected = DataFrame(columns=range(1), index=expected_index, dtype=any_string_dtype) + tm.assert_frame_equal(result, expected, check_column_type=True) # two un-named groups. result = s.str.extractall("(z)(z)") - expected = DataFrame(columns=[0, 1], index=expected_index, dtype=any_string_dtype) - tm.assert_frame_equal(result, expected) + expected = DataFrame(columns=range(2), index=expected_index, dtype=any_string_dtype) + tm.assert_frame_equal(result, expected, check_column_type=True) # one named group. result = s.str.extractall("(?Pz)")