keep fixing

MarcoGorelli · MarcoGorelli · commit 005759c08f31 · 2025-03-08T11:27:46.000Z
diff --git a/pandas-stubs/core/indexes/base.pyi b/pandas-stubs/core/indexes/base.pyi
@@ -272,6 +272,7 @@ class Index(IndexOpsMixin[S1]):
         Index[int],
         Index[bytes],
         Index[str],
+        Index[type[object]],
     ]: ...
     def is_(self, other) -> bool: ...
     def __len__(self) -> int: ...
diff --git a/pandas-stubs/core/series.pyi b/pandas-stubs/core/series.pyi
@@ -1164,6 +1164,7 @@ class Series(IndexOpsMixin[S1], NDFrame):
         Series[int],
         Series[bytes],
         Series[str],
+        Series[type[object]],
     ]: ...
     @property
     def dt(self) -> CombinedDatetimelikeProperties: ...
diff --git a/pandas-stubs/core/strings.pyi b/pandas-stubs/core/strings.pyi
@@ -43,8 +43,12 @@ _TI = TypeVar("_TI", bound=Series[int] | Index[int])
 _TE = TypeVar("_TE", bound=Series[bytes] | Index[bytes])
 # The _TD type is what is used for the result of str.encode
 _TD = TypeVar("_TD", bound=Series[str] | Index[str])
+# The _TO type is what is used for the result of str.encode
+_TO = TypeVar("_TO", bound=Series[type[object]] | Index[type[object]])
 
-class StringMethods(NoNewAttributesMixin, Generic[T, _TS, _TM, _TS2, _TI, _TE, _TD]):
+class StringMethods(
+    NoNewAttributesMixin, Generic[T, _TS, _TM, _TS2, _TI, _TE, _TD, _TO]
+):
     def __init__(self, data: T) -> None: ...
     def __getitem__(self, key: slice | int) -> T: ...
     def __iter__(self) -> T: ...
@@ -101,23 +105,19 @@ class StringMethods(NoNewAttributesMixin, Generic[T, _TS, _TM, _TS2, _TI, _TE, _
     @overload
     def partition(self, sep: str, expand: Literal[True]) -> _TS: ...
     @overload
-    def partition(
-        self, sep: str, expand: Literal[False]
-    ) -> pd.Series[type[object]]: ...
+    def partition(self, sep: str, expand: Literal[False]) -> _TO: ...
     @overload
-    def partition(self, *, expand: Literal[False]) -> pd.Series[type[object]]: ...
+    def partition(self, *, expand: Literal[False]) -> _TO: ...
     @overload
     def rpartition(self, sep: str = ...) -> _TS: ...
     @overload
-    def rpartition(self, *, expand: Literal[True]) -> pd.DataFrame: ...
+    def rpartition(self, *, expand: Literal[True]) -> _TS: ...
     @overload
-    def rpartition(self, sep: str, expand: Literal[True]) -> pd.DataFrame: ...
+    def rpartition(self, sep: str, expand: Literal[True]) -> _TS: ...
     @overload
-    def rpartition(
-        self, sep: str, expand: Literal[False]
-    ) -> pd.Series[type[object]]: ...
+    def rpartition(self, sep: str, expand: Literal[False]) -> _TO: ...
     @overload
-    def rpartition(self, *, expand: Literal[False]) -> pd.Series[type[object]]: ...
+    def rpartition(self, *, expand: Literal[False]) -> _TO: ...
     def get(self, i: int) -> T: ...
     def join(self, sep: str) -> _TD: ...
     def contains(
@@ -180,7 +180,7 @@ class StringMethods(NoNewAttributesMixin, Generic[T, _TS, _TM, _TS2, _TI, _TE, _
     @overload
     def extract(
         self, pat: str, flags: int = ..., *, expand: Literal[True] = ...
-    ) -> _TS: ...
+    ) -> pd.DataFrame: ...
     @overload
     def extract(
         self, pat: str, flags: int, expand: Literal[False]
@@ -189,7 +189,7 @@ class StringMethods(NoNewAttributesMixin, Generic[T, _TS, _TM, _TS2, _TI, _TE, _
     def extract(
         self, pat: str, flags: int = ..., *, expand: Literal[False]
     ) -> Series[type[object]]: ...
-    def extractall(self, pat: str, flags: int = ...) -> _TS: ...
+    def extractall(self, pat: str, flags: int = ...) -> pd.DataFrame: ...
     def find(self, sub: str, start: int = ..., end: int | None = ...) -> _TI: ...
     def rfind(self, sub: str, start: int = ..., end: int | None = ...) -> _TI: ...
     def normalize(self, form: Literal["NFC", "NFKC", "NFD", "NFKD"]) -> T: ...
diff --git a/tests/test_string_accessors.py b/tests/test_string_accessors.py
@@ -81,7 +81,7 @@ def test_string_accessors_type_preserving_index() -> None:
 
 def test_string_accessors_boolean_series():
     s = pd.Series(DATA)
-    _check = functools.partial(check, klass=pd.Series, dtype=bool)
+    _check = functools.partial(check, klass=pd.Series, dtype=np.bool_)
     _check(assert_type(s.str.startswith("a"), "pd.Series[bool]"))
     _check(
         assert_type(s.str.startswith(("a", "b")), "pd.Series[bool]"),
@@ -220,10 +220,82 @@ def test_string_accessors_expanding_series():
 def test_string_accessors_expanding_index():
     idx = pd.Index(["a1", "b2", "c3"])
     _check = functools.partial(check, klass=pd.MultiIndex)
-    _check(assert_type(idx.str.extract(r"([ab])?(\d)"), pd.MultiIndex))
-    _check(assert_type(idx.str.extractall(r"([ab])?(\d)"), pd.MultiIndex))
     _check(assert_type(idx.str.get_dummies(), pd.MultiIndex))
     _check(assert_type(idx.str.partition("p"), pd.MultiIndex))
     _check(assert_type(idx.str.rpartition("p"), pd.MultiIndex))
     _check(assert_type(idx.str.rsplit("a", expand=True), pd.MultiIndex))
     _check(assert_type(idx.str.split("a", expand=True), pd.MultiIndex))
+
+    # These ones are the odd ones out?
+    check(assert_type(idx.str.extractall(r"([ab])?(\d)"), pd.DataFrame), pd.DataFrame)
+    check(assert_type(idx.str.extract(r"([ab])?(\d)"), pd.DataFrame), pd.DataFrame)
+
+
+def test_series_overloads_partition():
+    s = pd.Series(
+        [
+            "ap;pl;ep",
+            "ban;an;ap",
+            "Che;rr;yp",
+            "DA;TEp",
+            "eGGp;LANT;p",
+            "12;3p",
+            "23.45p",
+        ]
+    )
+    check(assert_type(s.str.partition(sep=";"), pd.DataFrame), pd.DataFrame)
+    check(
+        assert_type(s.str.partition(sep=";", expand=True), pd.DataFrame), pd.DataFrame
+    )
+    check(
+        assert_type(s.str.partition(sep=";", expand=False), "pd.Series[type[object]]"),
+        pd.Series,
+        object,
+    )
+
+    check(assert_type(s.str.rpartition(sep=";"), pd.DataFrame), pd.DataFrame)
+    check(
+        assert_type(s.str.rpartition(sep=";", expand=True), pd.DataFrame), pd.DataFrame
+    )
+    check(
+        assert_type(s.str.rpartition(sep=";", expand=False), "pd.Series[type[object]]"),
+        pd.Series,
+        object,
+    )
+
+
+def test_index_overloads_partition():
+    idx = pd.Index(
+        [
+            "ap;pl;ep",
+            "ban;an;ap",
+            "Che;rr;yp",
+            "DA;TEp",
+            "eGGp;LANT;p",
+            "12;3p",
+            "23.45p",
+        ]
+    )
+    check(assert_type(idx.str.partition(sep=";"), pd.MultiIndex), pd.MultiIndex)
+    check(
+        assert_type(idx.str.partition(sep=";", expand=True), pd.MultiIndex),
+        pd.MultiIndex,
+    )
+    check(
+        assert_type(idx.str.partition(sep=";", expand=False), "pd.Index[type[object]]"),
+        pd.Index,
+        object,
+    )
+
+    check(assert_type(idx.str.rpartition(sep=";"), pd.MultiIndex), pd.MultiIndex)
+    check(
+        assert_type(idx.str.rpartition(sep=";", expand=True), pd.MultiIndex),
+        pd.MultiIndex,
+    )
+    check(
+        assert_type(
+            idx.str.rpartition(sep=";", expand=False), "pd.Index[type[object]]"
+        ),
+        pd.Index,
+        object,
+    )