From c4f0d960bc93473a6bcc1f777ee2f3be6c103a15 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Fri, 21 Feb 2025 12:16:13 +0000 Subject: [PATCH 1/7] type dataframe.replace --- pandas-stubs/core/frame.pyi | 42 +++++++++++++++++++++++++++++++------ tests/test_frame.py | 33 +++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+), 6 deletions(-) diff --git a/pandas-stubs/core/frame.pyi b/pandas-stubs/core/frame.pyi index 37a2d13ca..447edde5c 100644 --- a/pandas-stubs/core/frame.pyi +++ b/pandas-stubs/core/frame.pyi @@ -799,23 +799,53 @@ class DataFrame(NDFrame, OpsMixin, _GetItemHack): @overload def replace( self, - to_replace=..., - value: Scalar | NAType | Sequence | Mapping | Pattern | None = ..., + to_replace: ( + Scalar + | NAType + | Sequence[Scalar | Pattern] + | Mapping[Hashable, Scalar | Pattern] + | Pattern + | None + ) = ..., + value: ( + Scalar | NAType | Sequence[Scalar] | Mapping[Hashable, Scalar] | None + ) = ..., *, inplace: Literal[True], limit: int | None = ..., - regex=..., + regex: ( + bool + | str + | Pattern + | Sequence[str | Pattern] + | Mapping[Hashable, str | Pattern] + ) = ..., method: ReplaceMethod = ..., ) -> None: ... @overload def replace( self, - to_replace=..., - value: Scalar | NAType | Sequence | Mapping | Pattern | None = ..., + to_replace: ( + Scalar + | NAType + | Sequence[Scalar | Pattern] + | Mapping[Hashable, Scalar | Pattern] + | Pattern + | None + ) = ..., + value: ( + Scalar | NAType | Sequence[Scalar] | Mapping[Hashable, Scalar] | None + ) = ..., *, inplace: Literal[False] = ..., limit: int | None = ..., - regex=..., + regex: ( + bool + | str + | Pattern + | Sequence[str | Pattern] + | Mapping[Hashable, str | Pattern] + ) = ..., method: ReplaceMethod = ..., ) -> Self: ... def shift( diff --git a/tests/test_frame.py b/tests/test_frame.py index b74118b34..716d2cf4e 100644 --- a/tests/test_frame.py +++ b/tests/test_frame.py @@ -14,6 +14,7 @@ import io import itertools from pathlib import Path +import re import string import sys from typing import ( @@ -2570,6 +2571,38 @@ def test_types_replace() -> None: assert assert_type(df.replace(1, 2, inplace=True), None) is None +def test_dataframe_replace() -> None: + df = pd.DataFrame({"col1": ["a", "ab", "ba"]}) + pattern = re.compile(r"^a.*") + check(assert_type(df.replace("a", "x"), pd.DataFrame), pd.DataFrame) + check(assert_type(df.replace(pattern, "x"), pd.DataFrame), pd.DataFrame) + check( + assert_type(df.replace({"col1": "a"}, {"col1": "x"}), pd.DataFrame), + pd.DataFrame, + ) + check( + assert_type(df.replace({"col1": pattern}, {"col1": "x"}), pd.DataFrame), + pd.DataFrame, + ) + check(assert_type(df.replace(["a"], ["x"]), pd.DataFrame), pd.DataFrame) + check(assert_type(df.replace([pattern], ["x"]), pd.DataFrame), pd.DataFrame) + check(assert_type(df.replace("^a.*", "x", regex=True), pd.DataFrame), pd.DataFrame) + check(assert_type(df.replace(value="x", regex="^a."), pd.DataFrame), pd.DataFrame) + check(assert_type(df.replace(value="x", regex=["^a."]), pd.DataFrame), pd.DataFrame) + check( + assert_type(df.replace(value="x", regex={"col1": "^a."}), pd.DataFrame), + pd.DataFrame, + ) + check(assert_type(df.replace(value="x", regex=pattern), pd.DataFrame), pd.DataFrame) + check( + assert_type(df.replace(value="x", regex=[pattern]), pd.DataFrame), pd.DataFrame + ) + check( + assert_type(df.replace(value="x", regex={"col1": pattern}), pd.DataFrame), + pd.DataFrame, + ) + + def test_loop_dataframe() -> None: # GH 70 df = pd.DataFrame({"x": [1, 2, 3]}) From 3ffef4fb86ced5f26e6bf49e153e0d88910e4f3d Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Mon, 24 Feb 2025 09:52:59 +0000 Subject: [PATCH 2/7] the typing never stops --- pandas-stubs/core/frame.pyi | 9 +++---- pandas-stubs/core/series.pyi | 52 ++++++++++++++++++++++++++++-------- tests/test_frame.py | 6 +++++ tests/test_series.py | 34 +++++++++++++++++++++++ 4 files changed, 85 insertions(+), 16 deletions(-) diff --git a/pandas-stubs/core/frame.pyi b/pandas-stubs/core/frame.pyi index 447edde5c..b7b7d8ff3 100644 --- a/pandas-stubs/core/frame.pyi +++ b/pandas-stubs/core/frame.pyi @@ -113,7 +113,6 @@ from pandas._typing import ( RandomState, ReadBuffer, Renamer, - ReplaceMethod, Scalar, ScalarT, SequenceNotStr, @@ -804,6 +803,7 @@ class DataFrame(NDFrame, OpsMixin, _GetItemHack): | NAType | Sequence[Scalar | Pattern] | Mapping[Hashable, Scalar | Pattern] + | Series[Any] | Pattern | None ) = ..., @@ -812,15 +812,14 @@ class DataFrame(NDFrame, OpsMixin, _GetItemHack): ) = ..., *, inplace: Literal[True], - limit: int | None = ..., regex: ( bool | str | Pattern | Sequence[str | Pattern] | Mapping[Hashable, str | Pattern] + | Series[Any] ) = ..., - method: ReplaceMethod = ..., ) -> None: ... @overload def replace( @@ -830,6 +829,7 @@ class DataFrame(NDFrame, OpsMixin, _GetItemHack): | NAType | Sequence[Scalar | Pattern] | Mapping[Hashable, Scalar | Pattern] + | Series[Any] | Pattern | None ) = ..., @@ -838,15 +838,14 @@ class DataFrame(NDFrame, OpsMixin, _GetItemHack): ) = ..., *, inplace: Literal[False] = ..., - limit: int | None = ..., regex: ( bool | str | Pattern | Sequence[str | Pattern] | Mapping[Hashable, str | Pattern] + | Series[Any] ) = ..., - method: ReplaceMethod = ..., ) -> Self: ... def shift( self, diff --git a/pandas-stubs/core/series.pyi b/pandas-stubs/core/series.pyi index ecf65ab51..4799973cc 100644 --- a/pandas-stubs/core/series.pyi +++ b/pandas-stubs/core/series.pyi @@ -15,6 +15,7 @@ from datetime import ( timedelta, ) from pathlib import Path +from re import Pattern from typing import ( Any, ClassVar, @@ -141,7 +142,6 @@ from pandas._typing import ( QuantileInterpolation, RandomState, Renamer, - ReplaceMethod, Scalar, ScalarT, SequenceNotStr, @@ -1089,24 +1089,54 @@ class Series(IndexOpsMixin[S1], NDFrame): @overload def replace( self, - to_replace: _str | list | dict | Series[S1] | float | None = ..., - value: Scalar | NAType | dict | list | _str | None = ..., + to_replace: ( + Scalar + | NAType + | Sequence[Scalar | Pattern] + | Mapping[Scalar | Pattern, Scalar] + | Series[Any] + | Pattern + | None + ) = ..., + value: ( + Scalar | NAType | Sequence[Scalar] | Mapping[Scalar, Scalar] | None + ) = ..., *, - limit: int | None = ..., - regex=..., - method: ReplaceMethod = ..., + regex: ( + bool + | str + | Pattern + | Sequence[str | Pattern] + | Mapping[Scalar | Pattern, Scalar] + | Series[Any] + ) = ..., inplace: Literal[True], ) -> None: ... @overload def replace( self, - to_replace: _str | list | dict | Series[S1] | float | None = ..., - value: Scalar | NAType | dict | list | _str | None = ..., + to_replace: ( + Scalar + | NAType + | Sequence[Scalar | Pattern] + | Mapping[Scalar | Pattern, Scalar] + | Series[Any] + | Pattern + | None + ) = ..., + value: ( + Scalar | NAType | Sequence[Scalar] | Mapping[Scalar, Scalar] | None + ) = ..., *, inplace: Literal[False] = ..., - limit: int | None = ..., - regex=..., - method: ReplaceMethod = ..., + regex: ( + bool + | str + | Pattern + | Sequence[str | Pattern] + | Mapping[Scalar | Pattern, Scalar] + | Series[Any] + ) = ..., ) -> Series[S1]: ... def shift( self, diff --git a/tests/test_frame.py b/tests/test_frame.py index 716d2cf4e..f8d027302 100644 --- a/tests/test_frame.py +++ b/tests/test_frame.py @@ -2575,6 +2575,8 @@ def test_dataframe_replace() -> None: df = pd.DataFrame({"col1": ["a", "ab", "ba"]}) pattern = re.compile(r"^a.*") check(assert_type(df.replace("a", "x"), pd.DataFrame), pd.DataFrame) + check(assert_type(df.replace({"a": "x"}), pd.DataFrame), pd.DataFrame) + check(assert_type(df.replace(pd.Series({"a": "x"})), pd.DataFrame), pd.DataFrame) check(assert_type(df.replace(pattern, "x"), pd.DataFrame), pd.DataFrame) check( assert_type(df.replace({"col1": "a"}, {"col1": "x"}), pd.DataFrame), @@ -2601,6 +2603,10 @@ def test_dataframe_replace() -> None: assert_type(df.replace(value="x", regex={"col1": pattern}), pd.DataFrame), pd.DataFrame, ) + check(assert_type(df.replace(regex={"a": "x"}), pd.DataFrame), pd.DataFrame) + check( + assert_type(df.replace(regex=pd.Series({"a": "x"})), pd.DataFrame), pd.DataFrame + ) def test_loop_dataframe() -> None: diff --git a/tests/test_series.py b/tests/test_series.py index 7d0537ed6..0c700fcbd 100644 --- a/tests/test_series.py +++ b/tests/test_series.py @@ -1410,6 +1410,40 @@ def test_types_replace() -> None: assert assert_type(s.replace(1, 2, inplace=True), None) is None +def test_series_replace() -> None: + s: pd.Series[str] = pd.DataFrame({"col1": ["a", "ab", "ba"]})["col1"] + pattern = re.compile(r"^a.*") + check(assert_type(s.replace("a", "x"), "pd.Series[str]"), pd.Series) + check(assert_type(s.replace(pattern, "x"), "pd.Series[str]"), pd.Series) + check( + assert_type(s.replace({"a": "z"}), "pd.Series[str]"), + pd.Series, + ) + check( + assert_type(s.replace(pd.Series({"a": "z"})), "pd.Series[str]"), + pd.Series, + ) + check( + assert_type(s.replace({pattern: "z"}), "pd.Series[str]"), + pd.Series, + ) + check(assert_type(s.replace(["a"], ["x"]), "pd.Series[str]"), pd.Series) + check(assert_type(s.replace([pattern], ["x"]), "pd.Series[str]"), pd.Series) + check(assert_type(s.replace(r"^a.*", "x", regex=True), "pd.Series[str]"), pd.Series) + check(assert_type(s.replace(value="x", regex=r"^a.*"), "pd.Series[str]"), pd.Series) + check( + assert_type(s.replace(value="x", regex=[r"^a.*"]), "pd.Series[str]"), pd.Series + ) + check(assert_type(s.replace(value="x", regex=pattern), "pd.Series[str]"), pd.Series) + check( + assert_type(s.replace(value="x", regex=[pattern]), "pd.Series[str]"), pd.Series + ) + check(assert_type(s.replace(regex={"a": "x"}), "pd.Series[str]"), pd.Series) + check( + assert_type(s.replace(regex=pd.Series({"a": "x"})), "pd.Series[str]"), pd.Series + ) + + def test_cat_accessor() -> None: # GH 43 s: pd.Series[str] = pd.Series( From b04f5e6f0b4f369140fe27ee1998296e1d3c242b Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Mon, 24 Feb 2025 09:57:35 +0000 Subject: [PATCH 3/7] remove unused ReplaceMethod --- pandas-stubs/_typing.pyi | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas-stubs/_typing.pyi b/pandas-stubs/_typing.pyi index 1b31c8795..3e680384e 100644 --- a/pandas-stubs/_typing.pyi +++ b/pandas-stubs/_typing.pyi @@ -731,7 +731,6 @@ InterpolateOptions: TypeAlias = Literal[ "cubicspline", "from_derivatives", ] -ReplaceMethod: TypeAlias = Literal["pad", "ffill", "bfill"] SortKind: TypeAlias = Literal["quicksort", "mergesort", "heapsort", "stable"] NaPosition: TypeAlias = Literal["first", "last"] JoinHow: TypeAlias = Literal["left", "right", "outer", "inner"] From 5706029e8fc9def996cf2e2aabceda6fad278c49 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Mon, 24 Feb 2025 15:09:31 +0000 Subject: [PATCH 4/7] finish dataframe.replace typing --- pandas-stubs/core/frame.pyi | 60 ++++++++++++++++----- tests/test_frame.py | 102 +++++++++++++++++++++++++++++++----- 2 files changed, 138 insertions(+), 24 deletions(-) diff --git a/pandas-stubs/core/frame.pyi b/pandas-stubs/core/frame.pyi index b7b7d8ff3..887a7a25a 100644 --- a/pandas-stubs/core/frame.pyi +++ b/pandas-stubs/core/frame.pyi @@ -802,23 +802,41 @@ class DataFrame(NDFrame, OpsMixin, _GetItemHack): Scalar | NAType | Sequence[Scalar | Pattern] + | Mapping[Scalar | Pattern, Scalar] | Mapping[Hashable, Scalar | Pattern] + | Mapping[Hashable, Sequence[Scalar | Pattern]] + | Mapping[Hashable, Mapping[Scalar | Pattern, Scalar]] + | Mapping[Hashable, Series[Any]] | Series[Any] | Pattern | None ) = ..., value: ( - Scalar | NAType | Sequence[Scalar] | Mapping[Hashable, Scalar] | None + Scalar + | NAType + | Sequence[Scalar] + | Mapping[Scalar, Scalar] + | Mapping[Hashable, Scalar] + | Mapping[Hashable, Sequence[Scalar]] + | Mapping[Hashable, Mapping[Scalar, Scalar]] + | Mapping[Hashable, Series[Any]] + | Series[Any] + | None ) = ..., *, inplace: Literal[True], regex: ( - bool - | str - | Pattern - | Sequence[str | Pattern] - | Mapping[Hashable, str | Pattern] + Scalar + | NAType + | Sequence[Scalar | Pattern] + | Mapping[Scalar | Pattern, Scalar] + | Mapping[Hashable, Scalar | Pattern] + | Mapping[Hashable, Sequence[Scalar | Pattern]] + | Mapping[Hashable, Mapping[Scalar | Pattern, Scalar]] + | Mapping[Hashable, Series[Any]] | Series[Any] + | Pattern + | None ) = ..., ) -> None: ... @overload @@ -828,23 +846,41 @@ class DataFrame(NDFrame, OpsMixin, _GetItemHack): Scalar | NAType | Sequence[Scalar | Pattern] + | Mapping[Scalar | Pattern, Scalar] | Mapping[Hashable, Scalar | Pattern] + | Mapping[Hashable, Sequence[Scalar | Pattern]] + | Mapping[Hashable, Mapping[Scalar | Pattern, Scalar]] + | Mapping[Hashable, Series[Any]] | Series[Any] | Pattern | None ) = ..., value: ( - Scalar | NAType | Sequence[Scalar] | Mapping[Hashable, Scalar] | None + Scalar + | NAType + | Sequence[Scalar] + | Mapping[Scalar, Scalar] + | Mapping[Hashable, Scalar] + | Mapping[Hashable, Sequence[Scalar]] + | Mapping[Hashable, Mapping[Scalar, Scalar]] + | Mapping[Hashable, Series[Any]] + | Series[Any] + | None ) = ..., *, inplace: Literal[False] = ..., regex: ( - bool - | str - | Pattern - | Sequence[str | Pattern] - | Mapping[Hashable, str | Pattern] + Scalar + | NAType + | Sequence[Scalar | Pattern] + | Mapping[Scalar | Pattern, Scalar] + | Mapping[Hashable, Scalar | Pattern] + | Mapping[Hashable, Sequence[Scalar | Pattern]] + | Mapping[Hashable, Mapping[Scalar | Pattern, Scalar]] + | Mapping[Hashable, Series[Any]] | Series[Any] + | Pattern + | None ) = ..., ) -> Self: ... def shift( diff --git a/tests/test_frame.py b/tests/test_frame.py index f8d027302..530b57f4a 100644 --- a/tests/test_frame.py +++ b/tests/test_frame.py @@ -2574,10 +2574,31 @@ def test_types_replace() -> None: def test_dataframe_replace() -> None: df = pd.DataFrame({"col1": ["a", "ab", "ba"]}) pattern = re.compile(r"^a.*") + # global scalar replacement check(assert_type(df.replace("a", "x"), pd.DataFrame), pd.DataFrame) + check(assert_type(df.replace(pattern, "x"), pd.DataFrame), pd.DataFrame) + check(assert_type(df.replace("a", "x", regex=True), pd.DataFrame), pd.DataFrame) + check(assert_type(df.replace(pattern, "x"), pd.DataFrame), pd.DataFrame) + check(assert_type(df.replace(regex="a", value="x"), pd.DataFrame), pd.DataFrame) + check(assert_type(df.replace(regex=pattern, value="x"), pd.DataFrame), pd.DataFrame) + # global sequence replacement + check(assert_type(df.replace(["a"], ["x"]), pd.DataFrame), pd.DataFrame) + check(assert_type(df.replace([pattern], ["x"]), pd.DataFrame), pd.DataFrame) + check(assert_type(df.replace(regex=["a"], value=["x"]), pd.DataFrame), pd.DataFrame) + check( + assert_type(df.replace(regex=[pattern], value=["x"]), pd.DataFrame), + pd.DataFrame, + ) + # global mapping check(assert_type(df.replace({"a": "x"}), pd.DataFrame), pd.DataFrame) + check(assert_type(df.replace({pattern: "x"}), pd.DataFrame), pd.DataFrame) check(assert_type(df.replace(pd.Series({"a": "x"})), pd.DataFrame), pd.DataFrame) - check(assert_type(df.replace(pattern, "x"), pd.DataFrame), pd.DataFrame) + check(assert_type(df.replace(regex={"a": "x"}), pd.DataFrame), pd.DataFrame) + check(assert_type(df.replace(regex={pattern: "x"}), pd.DataFrame), pd.DataFrame) + check( + assert_type(df.replace(regex=pd.Series({"a": "x"})), pd.DataFrame), pd.DataFrame + ) + # per-column scalar-scalar replacement check( assert_type(df.replace({"col1": "a"}, {"col1": "x"}), pd.DataFrame), pd.DataFrame, @@ -2586,26 +2607,83 @@ def test_dataframe_replace() -> None: assert_type(df.replace({"col1": pattern}, {"col1": "x"}), pd.DataFrame), pd.DataFrame, ) - check(assert_type(df.replace(["a"], ["x"]), pd.DataFrame), pd.DataFrame) - check(assert_type(df.replace([pattern], ["x"]), pd.DataFrame), pd.DataFrame) - check(assert_type(df.replace("^a.*", "x", regex=True), pd.DataFrame), pd.DataFrame) - check(assert_type(df.replace(value="x", regex="^a."), pd.DataFrame), pd.DataFrame) - check(assert_type(df.replace(value="x", regex=["^a."]), pd.DataFrame), pd.DataFrame) check( - assert_type(df.replace(value="x", regex={"col1": "^a."}), pd.DataFrame), + assert_type( + df.replace(pd.Series({"col1": "a"}), pd.Series({"col1": "x"})), pd.DataFrame + ), pd.DataFrame, ) - check(assert_type(df.replace(value="x", regex=pattern), pd.DataFrame), pd.DataFrame) check( - assert_type(df.replace(value="x", regex=[pattern]), pd.DataFrame), pd.DataFrame + assert_type(df.replace(regex={"col1": "a"}, value={"col1": "x"}), pd.DataFrame), + pd.DataFrame, ) check( - assert_type(df.replace(value="x", regex={"col1": pattern}), pd.DataFrame), + assert_type( + df.replace(regex={"col1": pattern}, value={"col1": "x"}), pd.DataFrame + ), pd.DataFrame, ) - check(assert_type(df.replace(regex={"a": "x"}), pd.DataFrame), pd.DataFrame) check( - assert_type(df.replace(regex=pd.Series({"a": "x"})), pd.DataFrame), pd.DataFrame + assert_type( + df.replace(regex=pd.Series({"col1": "a"}), value=pd.Series({"col1": "x"})), + pd.DataFrame, + ), + pd.DataFrame, + ) + # per-column sequence replacement + check( + assert_type(df.replace({"col1": ["a"]}, {"col1": ["x"]}), pd.DataFrame), + pd.DataFrame, + ) + check( + assert_type(df.replace({"col1": [pattern]}, {"col1": ["x"]}), pd.DataFrame), + pd.DataFrame, + ) + check( + assert_type( + df.replace(pd.Series({"col1": ["a"]}), pd.Series({"col1": ["x"]})), + pd.DataFrame, + ), + pd.DataFrame, + ) + check( + assert_type( + df.replace(regex={"col1": ["a"]}, value={"col1": ["x"]}), pd.DataFrame + ), + pd.DataFrame, + ) + check( + assert_type( + df.replace(regex={"col1": [pattern]}, value={"col1": ["x"]}), pd.DataFrame + ), + pd.DataFrame, + ) + check( + assert_type( + df.replace( + regex=pd.Series({"col1": ["a"]}), value=pd.Series({"col1": ["x"]}) + ), + pd.DataFrame, + ), + pd.DataFrame, + ) + # per-column mapping + check(assert_type(df.replace({"col1": {"a": "x"}}), pd.DataFrame), pd.DataFrame) + check(assert_type(df.replace({"col1": {pattern: "x"}}), pd.DataFrame), pd.DataFrame) + check( + assert_type(df.replace({"col1": pd.Series({"a": "x"})}), pd.DataFrame), + pd.DataFrame, + ) + check( + assert_type(df.replace(regex={"col1": {"a": "x"}}), pd.DataFrame), pd.DataFrame + ) + check( + assert_type(df.replace(regex={"col1": {pattern: "x"}}), pd.DataFrame), + pd.DataFrame, + ) + check( + assert_type(df.replace(regex={"col1": pd.Series({"a": "x"})}), pd.DataFrame), + pd.DataFrame, ) From 148afe872f08f7aa5c06555dccd73cd3f33c71f9 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Mon, 24 Feb 2025 16:13:40 +0000 Subject: [PATCH 5/7] use typealias --- pandas-stubs/_typing.pyi | 11 +++++ pandas-stubs/core/frame.pyi | 84 +++--------------------------------- pandas-stubs/core/series.pyi | 52 +++++----------------- tests/test_frame.py | 11 +++-- 4 files changed, 33 insertions(+), 125 deletions(-) diff --git a/pandas-stubs/_typing.pyi b/pandas-stubs/_typing.pyi index 3e680384e..2791ff104 100644 --- a/pandas-stubs/_typing.pyi +++ b/pandas-stubs/_typing.pyi @@ -10,6 +10,7 @@ from collections.abc import ( import datetime from datetime import tzinfo from os import PathLike +from re import Pattern import sys from typing import ( Any, @@ -36,6 +37,7 @@ from typing_extensions import ( ) from pandas._libs.interval import Interval +from pandas._libs.missing import NAType from pandas._libs.tslibs import ( BaseOffset, Period, @@ -731,6 +733,15 @@ InterpolateOptions: TypeAlias = Literal[ "cubicspline", "from_derivatives", ] +ReplaceValue: TypeAlias = ( + Scalar + | Pattern + | NAType + | Sequence[Scalar | Pattern] + | Mapping[Scalar | Pattern, Scalar] + | Series[Any] + | None +) SortKind: TypeAlias = Literal["quicksort", "mergesort", "heapsort", "stable"] NaPosition: TypeAlias = Literal["first", "last"] JoinHow: TypeAlias = Literal["left", "right", "outer", "inner"] diff --git a/pandas-stubs/core/frame.pyi b/pandas-stubs/core/frame.pyi index 887a7a25a..344dd3a39 100644 --- a/pandas-stubs/core/frame.pyi +++ b/pandas-stubs/core/frame.pyi @@ -8,7 +8,6 @@ from collections.abc import ( Sequence, ) import datetime as dt -from re import Pattern import sys from typing import ( Any, @@ -113,6 +112,7 @@ from pandas._typing import ( RandomState, ReadBuffer, Renamer, + ReplaceValue, Scalar, ScalarT, SequenceNotStr, @@ -798,90 +798,20 @@ class DataFrame(NDFrame, OpsMixin, _GetItemHack): @overload def replace( self, - to_replace: ( - Scalar - | NAType - | Sequence[Scalar | Pattern] - | Mapping[Scalar | Pattern, Scalar] - | Mapping[Hashable, Scalar | Pattern] - | Mapping[Hashable, Sequence[Scalar | Pattern]] - | Mapping[Hashable, Mapping[Scalar | Pattern, Scalar]] - | Mapping[Hashable, Series[Any]] - | Series[Any] - | Pattern - | None - ) = ..., - value: ( - Scalar - | NAType - | Sequence[Scalar] - | Mapping[Scalar, Scalar] - | Mapping[Hashable, Scalar] - | Mapping[Hashable, Sequence[Scalar]] - | Mapping[Hashable, Mapping[Scalar, Scalar]] - | Mapping[Hashable, Series[Any]] - | Series[Any] - | None - ) = ..., + to_replace: ReplaceValue | Mapping[Hashable, ReplaceValue] = ..., + value: ReplaceValue | Mapping[Hashable, ReplaceValue] = ..., *, inplace: Literal[True], - regex: ( - Scalar - | NAType - | Sequence[Scalar | Pattern] - | Mapping[Scalar | Pattern, Scalar] - | Mapping[Hashable, Scalar | Pattern] - | Mapping[Hashable, Sequence[Scalar | Pattern]] - | Mapping[Hashable, Mapping[Scalar | Pattern, Scalar]] - | Mapping[Hashable, Series[Any]] - | Series[Any] - | Pattern - | None - ) = ..., + regex: ReplaceValue | Mapping[Hashable, ReplaceValue] = ..., ) -> None: ... @overload def replace( self, - to_replace: ( - Scalar - | NAType - | Sequence[Scalar | Pattern] - | Mapping[Scalar | Pattern, Scalar] - | Mapping[Hashable, Scalar | Pattern] - | Mapping[Hashable, Sequence[Scalar | Pattern]] - | Mapping[Hashable, Mapping[Scalar | Pattern, Scalar]] - | Mapping[Hashable, Series[Any]] - | Series[Any] - | Pattern - | None - ) = ..., - value: ( - Scalar - | NAType - | Sequence[Scalar] - | Mapping[Scalar, Scalar] - | Mapping[Hashable, Scalar] - | Mapping[Hashable, Sequence[Scalar]] - | Mapping[Hashable, Mapping[Scalar, Scalar]] - | Mapping[Hashable, Series[Any]] - | Series[Any] - | None - ) = ..., + to_replace: ReplaceValue | Mapping[Hashable, ReplaceValue] = ..., + value: ReplaceValue | Mapping[Hashable, ReplaceValue] = ..., *, inplace: Literal[False] = ..., - regex: ( - Scalar - | NAType - | Sequence[Scalar | Pattern] - | Mapping[Scalar | Pattern, Scalar] - | Mapping[Hashable, Scalar | Pattern] - | Mapping[Hashable, Sequence[Scalar | Pattern]] - | Mapping[Hashable, Mapping[Scalar | Pattern, Scalar]] - | Mapping[Hashable, Series[Any]] - | Series[Any] - | Pattern - | None - ) = ..., + regex: ReplaceValue | Mapping[Hashable, ReplaceValue] = ..., ) -> Self: ... def shift( self, diff --git a/pandas-stubs/core/series.pyi b/pandas-stubs/core/series.pyi index 4799973cc..2ade583c7 100644 --- a/pandas-stubs/core/series.pyi +++ b/pandas-stubs/core/series.pyi @@ -15,7 +15,6 @@ from datetime import ( timedelta, ) from pathlib import Path -from re import Pattern from typing import ( Any, ClassVar, @@ -25,7 +24,10 @@ from typing import ( overload, ) -from _typing import TimeZones +from _typing import ( + ReplaceValue, + TimeZones, +) from matplotlib.axes import ( Axes as PlotAxes, SubplotBase, @@ -1089,54 +1091,20 @@ class Series(IndexOpsMixin[S1], NDFrame): @overload def replace( self, - to_replace: ( - Scalar - | NAType - | Sequence[Scalar | Pattern] - | Mapping[Scalar | Pattern, Scalar] - | Series[Any] - | Pattern - | None - ) = ..., - value: ( - Scalar | NAType | Sequence[Scalar] | Mapping[Scalar, Scalar] | None - ) = ..., + to_replace: ReplaceValue = ..., + value: ReplaceValue = ..., *, - regex: ( - bool - | str - | Pattern - | Sequence[str | Pattern] - | Mapping[Scalar | Pattern, Scalar] - | Series[Any] - ) = ..., + regex: ReplaceValue = ..., inplace: Literal[True], ) -> None: ... @overload def replace( self, - to_replace: ( - Scalar - | NAType - | Sequence[Scalar | Pattern] - | Mapping[Scalar | Pattern, Scalar] - | Series[Any] - | Pattern - | None - ) = ..., - value: ( - Scalar | NAType | Sequence[Scalar] | Mapping[Scalar, Scalar] | None - ) = ..., + to_replace: ReplaceValue = ..., + value: ReplaceValue = ..., *, + regex: ReplaceValue = ..., inplace: Literal[False] = ..., - regex: ( - bool - | str - | Pattern - | Sequence[str | Pattern] - | Mapping[Scalar | Pattern, Scalar] - | Series[Any] - ) = ..., ) -> Series[S1]: ... def shift( self, diff --git a/tests/test_frame.py b/tests/test_frame.py index 530b57f4a..7696008e3 100644 --- a/tests/test_frame.py +++ b/tests/test_frame.py @@ -2574,14 +2574,13 @@ def test_types_replace() -> None: def test_dataframe_replace() -> None: df = pd.DataFrame({"col1": ["a", "ab", "ba"]}) pattern = re.compile(r"^a.*") - # global scalar replacement check(assert_type(df.replace("a", "x"), pd.DataFrame), pd.DataFrame) check(assert_type(df.replace(pattern, "x"), pd.DataFrame), pd.DataFrame) check(assert_type(df.replace("a", "x", regex=True), pd.DataFrame), pd.DataFrame) check(assert_type(df.replace(pattern, "x"), pd.DataFrame), pd.DataFrame) check(assert_type(df.replace(regex="a", value="x"), pd.DataFrame), pd.DataFrame) check(assert_type(df.replace(regex=pattern, value="x"), pd.DataFrame), pd.DataFrame) - # global sequence replacement + check(assert_type(df.replace(["a"], ["x"]), pd.DataFrame), pd.DataFrame) check(assert_type(df.replace([pattern], ["x"]), pd.DataFrame), pd.DataFrame) check(assert_type(df.replace(regex=["a"], value=["x"]), pd.DataFrame), pd.DataFrame) @@ -2589,7 +2588,7 @@ def test_dataframe_replace() -> None: assert_type(df.replace(regex=[pattern], value=["x"]), pd.DataFrame), pd.DataFrame, ) - # global mapping + check(assert_type(df.replace({"a": "x"}), pd.DataFrame), pd.DataFrame) check(assert_type(df.replace({pattern: "x"}), pd.DataFrame), pd.DataFrame) check(assert_type(df.replace(pd.Series({"a": "x"})), pd.DataFrame), pd.DataFrame) @@ -2598,7 +2597,7 @@ def test_dataframe_replace() -> None: check( assert_type(df.replace(regex=pd.Series({"a": "x"})), pd.DataFrame), pd.DataFrame ) - # per-column scalar-scalar replacement + check( assert_type(df.replace({"col1": "a"}, {"col1": "x"}), pd.DataFrame), pd.DataFrame, @@ -2630,7 +2629,7 @@ def test_dataframe_replace() -> None: ), pd.DataFrame, ) - # per-column sequence replacement + check( assert_type(df.replace({"col1": ["a"]}, {"col1": ["x"]}), pd.DataFrame), pd.DataFrame, @@ -2667,7 +2666,7 @@ def test_dataframe_replace() -> None: ), pd.DataFrame, ) - # per-column mapping + check(assert_type(df.replace({"col1": {"a": "x"}}), pd.DataFrame), pd.DataFrame) check(assert_type(df.replace({"col1": {pattern: "x"}}), pd.DataFrame), pd.DataFrame) check( From be6ff2272e075852d21a12165f7d9f1eb4f4b1b7 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Mon, 24 Feb 2025 17:59:29 +0000 Subject: [PATCH 6/7] mypy fixup --- pandas-stubs/_typing.pyi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas-stubs/_typing.pyi b/pandas-stubs/_typing.pyi index 2791ff104..c6c118b18 100644 --- a/pandas-stubs/_typing.pyi +++ b/pandas-stubs/_typing.pyi @@ -738,7 +738,7 @@ ReplaceValue: TypeAlias = ( | Pattern | NAType | Sequence[Scalar | Pattern] - | Mapping[Scalar | Pattern, Scalar] + | Mapping[Hashable, Scalar] | Series[Any] | None ) From 3b772090367cd498f7420c7cf683ad1518b54094 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Mon, 24 Feb 2025 18:01:11 +0000 Subject: [PATCH 7/7] comment --- pandas-stubs/_typing.pyi | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas-stubs/_typing.pyi b/pandas-stubs/_typing.pyi index c6c118b18..478f60da0 100644 --- a/pandas-stubs/_typing.pyi +++ b/pandas-stubs/_typing.pyi @@ -733,6 +733,8 @@ InterpolateOptions: TypeAlias = Literal[ "cubicspline", "from_derivatives", ] +# Can be passed to `to_replace`, `value`, or `regex` in `Series.replace`. +# `DataFrame.replace` also accepts mappings of these. ReplaceValue: TypeAlias = ( Scalar | Pattern