From 5305cd039ba77340191ecb8ecc3060e3d8eb1b7c Mon Sep 17 00:00:00 2001 From: Daniel Roseman Date: Tue, 25 Oct 2022 21:16:55 +0100 Subject: [PATCH 1/4] Series.value_counts returns Series[int]. --- pandas-stubs/core/series.pyi | 2 +- tests/test_series.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas-stubs/core/series.pyi b/pandas-stubs/core/series.pyi index 189054673..cbe54fef2 100644 --- a/pandas-stubs/core/series.pyi +++ b/pandas-stubs/core/series.pyi @@ -1193,7 +1193,7 @@ class Series(IndexOpsMixin, NDFrame, Generic[S1]): ascending: _bool = ..., bins: int | None = ..., dropna: _bool = ..., - ) -> Series[S1]: ... + ) -> Series[int]: ... def transpose(self, *args, **kwargs) -> Series[S1]: ... @property def T(self) -> Series[S1]: ... diff --git a/tests/test_series.py b/tests/test_series.py index 67f768d81..8e1b3c8cf 100644 --- a/tests/test_series.py +++ b/tests/test_series.py @@ -367,8 +367,8 @@ def test_types_idxmax() -> None: def test_types_value_counts() -> None: - s = pd.Series([1, 2]) - s.value_counts() + s = pd.Series(["a", "b"]) + check(assert_type(s.value_counts(), "pd.Series[int]"), pd.Series, int) def test_types_unique() -> None: From 35df38fde1bd201bd5f9c07da8d288868009e672 Mon Sep 17 00:00:00 2001 From: Daniel Roseman Date: Tue, 25 Oct 2022 21:38:18 +0100 Subject: [PATCH 2/4] Series.apply callable result might not be hashable It's possible for the callable in Series.apply to return something non-hashable like a list, but the result of apply should still be a Series. --- pandas-stubs/core/series.pyi | 2 +- tests/test_series.py | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/pandas-stubs/core/series.pyi b/pandas-stubs/core/series.pyi index cbe54fef2..6697f7924 100644 --- a/pandas-stubs/core/series.pyi +++ b/pandas-stubs/core/series.pyi @@ -685,7 +685,7 @@ class Series(IndexOpsMixin, NDFrame, Generic[S1]): @overload def apply( self, - func: Callable[..., Hashable], + func: Callable[..., Scalar | Sequence | Mapping], convertDType: _bool = ..., args: tuple = ..., **kwds, diff --git a/tests/test_series.py b/tests/test_series.py index 8e1b3c8cf..e6136f503 100644 --- a/tests/test_series.py +++ b/tests/test_series.py @@ -398,6 +398,11 @@ def retseries(x: float) -> float: check(assert_type(s.apply(retseries).tolist(), list), list) + def retlist(x: float) -> list: + return [x] + + check(assert_type(s.apply(retlist), pd.Series), pd.Series, list) + def get_depth(url: str) -> int: return len(url) From e8d6a77282650a8274298d0a6195146f2baae5aa Mon Sep 17 00:00:00 2001 From: Daniel Roseman Date: Wed, 26 Oct 2022 09:55:48 +0100 Subject: [PATCH 3/4] More detailed typing for DataFrame.apply. Whether it returns a Series or a DataFrame depends on the return type of the callable. In the case of the callable returning a scalar, the result is a Series unless the result_type is "broadcast". --- pandas-stubs/core/frame.pyi | 26 ++++++++++++++++++++++---- tests/test_frame.py | 19 ++++++++++++++++--- 2 files changed, 38 insertions(+), 7 deletions(-) diff --git a/pandas-stubs/core/frame.pyi b/pandas-stubs/core/frame.pyi index fcd6b996a..a65e6e1d9 100644 --- a/pandas-stubs/core/frame.pyi +++ b/pandas-stubs/core/frame.pyi @@ -1088,14 +1088,32 @@ class DataFrame(NDFrame, OpsMixin): **kwargs, ) -> DataFrame: ... @overload - def apply(self, f: Callable) -> Series: ... + def apply( + self, + f: Callable[..., Series], + axis: AxisType = ..., + raw: _bool = ..., + result_type: Literal["expand", "reduce", "broadcast"] | None = ..., + args=..., + **kwargs, + ) -> DataFrame: ... + @overload + def apply( + self, + f: Callable[..., Scalar], + axis: AxisType = ..., + raw: _bool = ..., + result_type: Literal["expand", "reduce"] | None = ..., + args=..., + **kwargs, + ) -> Series: ... @overload def apply( self, - f: Callable, - axis: AxisType, + f: Callable[..., Scalar], + result_type: Literal["broadcast"], + axis: AxisType = ..., raw: _bool = ..., - result_type: _str | None = ..., args=..., **kwargs, ) -> DataFrame: ... diff --git a/tests/test_frame.py b/tests/test_frame.py index 3f0a4ad52..42fe3f680 100644 --- a/tests/test_frame.py +++ b/tests/test_frame.py @@ -460,9 +460,22 @@ def test_types_unique() -> None: def test_types_apply() -> None: df = pd.DataFrame(data={"col1": [2, 1], "col2": [3, 4]}) - df.apply(lambda x: x**2) - df.apply(np.exp) - df.apply(str) + + def returns_series(x: pd.Series) -> pd.Series: + return x**2 + + check(assert_type(df.apply(returns_series), pd.DataFrame), pd.DataFrame) + + def returns_scalar(x: pd.Series) -> float: + return 2 + + check(assert_type(df.apply(returns_scalar), pd.Series), pd.Series) + check( + assert_type(df.apply(returns_scalar, result_type="broadcast"), pd.DataFrame), + pd.DataFrame, + ) + check(assert_type(df.apply(np.exp), pd.DataFrame), pd.DataFrame) + check(assert_type(df.apply(str), pd.Series), pd.Series) def test_types_applymap() -> None: From b1d4b2583b961358b80ce6e9434987d7f889a16d Mon Sep 17 00:00:00 2001 From: Daniel Roseman Date: Thu, 27 Oct 2022 10:15:02 +0100 Subject: [PATCH 4/4] Add test for #393. --- tests/test_frame.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/test_frame.py b/tests/test_frame.py index 42fe3f680..977dd7f2c 100644 --- a/tests/test_frame.py +++ b/tests/test_frame.py @@ -477,6 +477,12 @@ def returns_scalar(x: pd.Series) -> float: check(assert_type(df.apply(np.exp), pd.DataFrame), pd.DataFrame) check(assert_type(df.apply(str), pd.Series), pd.Series) + # GH 393 + def gethead(s: pd.Series, y: int) -> pd.Series: + return s.head(y) + + check(assert_type(df.apply(gethead, args=(4,)), pd.DataFrame), pd.DataFrame) + def test_types_applymap() -> None: df = pd.DataFrame(data={"col1": [2, 1], "col2": [3, 4]})