From 77060c143c27a87f00d3638eabb97ec4428be143 Mon Sep 17 00:00:00 2001 From: Soshi Katsuta Date: Wed, 14 Dec 2022 21:47:26 +0900 Subject: [PATCH 1/7] ENH: add dict to return type of func argument of DataFrame#apply when result_type="expand" --- pandas-stubs/core/frame.pyi | 2 +- tests/test_frame.py | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/pandas-stubs/core/frame.pyi b/pandas-stubs/core/frame.pyi index 10a37c787..28044139f 100644 --- a/pandas-stubs/core/frame.pyi +++ b/pandas-stubs/core/frame.pyi @@ -1126,7 +1126,7 @@ class DataFrame(NDFrame, OpsMixin): @overload def apply( self, - f: Callable[..., ListLikeExceptSeriesAndStr | Series], + f: Callable[..., ListLikeExceptSeriesAndStr | Series | dict], axis: AxisType = ..., raw: _bool = ..., args=..., diff --git a/tests/test_frame.py b/tests/test_frame.py index 5d67ef728..4c77a6a7f 100644 --- a/tests/test_frame.py +++ b/tests/test_frame.py @@ -477,6 +477,9 @@ def returns_listlike_of_2(x: pd.Series) -> tuple[int, int]: def returns_listlike_of_3(x: pd.Series) -> tuple[int, int, int]: return (7, 8, 9) + def returns_dict(x: pd.Series) -> dict[str, int]: + return {"col4": 7, "col5": 8} + # Misc checks check(assert_type(df.apply(np.exp), pd.DataFrame), pd.DataFrame) check(assert_type(df.apply(str), "pd.Series[str]"), pd.Series, str) @@ -518,6 +521,10 @@ def gethead(s: pd.Series, y: int) -> pd.Series: ), pd.DataFrame, ) + check( + assert_type(df.apply(returns_dict, result_type="expand"), pd.DataFrame), + pd.DataFrame, + ) # Check various return types for result_type="reduce" with default axis (0) check( @@ -599,6 +606,10 @@ def gethead(s: pd.Series, y: int) -> pd.Series: ), pd.DataFrame, ) + check( + assert_type(df.apply(returns_dict, axis=1, result_type="expand"), pd.DataFrame), + pd.DataFrame, + ) # Check various return types for result_type="reduce" with axis=1 check( From eec0284646d546570d97bd0921a96cf90d94c02b Mon Sep 17 00:00:00 2001 From: Soshi Katsuta Date: Fri, 16 Dec 2022 22:51:30 +0900 Subject: [PATCH 2/7] tests/test_frame: replace Literal[None] with just None --- pandas-stubs/core/frame.pyi | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas-stubs/core/frame.pyi b/pandas-stubs/core/frame.pyi index 28044139f..f3c4ffea3 100644 --- a/pandas-stubs/core/frame.pyi +++ b/pandas-stubs/core/frame.pyi @@ -1096,7 +1096,7 @@ class DataFrame(NDFrame, OpsMixin): f: Callable[..., ListLikeExceptSeriesAndStr | Series], axis: AxisTypeIndex = ..., raw: _bool = ..., - result_type: Literal[None] = ..., + result_type: None = ..., args=..., **kwargs, ) -> DataFrame: ... @@ -1106,7 +1106,7 @@ class DataFrame(NDFrame, OpsMixin): f: Callable[..., S1], axis: AxisTypeIndex = ..., raw: _bool = ..., - result_type: Literal[None] = ..., + result_type: None = ..., args=..., **kwargs, ) -> Series[S1]: ... @@ -1176,7 +1176,7 @@ class DataFrame(NDFrame, OpsMixin): self, f: Callable[..., S1], raw: _bool = ..., - result_type: Literal[None] = ..., + result_type: None = ..., args=..., *, axis: AxisTypeColumn, @@ -1187,7 +1187,7 @@ class DataFrame(NDFrame, OpsMixin): self, f: Callable[..., ListLikeExceptSeriesAndStr], raw: _bool = ..., - result_type: Literal[None] = ..., + result_type: None = ..., args=..., *, axis: AxisTypeColumn, @@ -1198,7 +1198,7 @@ class DataFrame(NDFrame, OpsMixin): self, f: Callable[..., Series], raw: _bool = ..., - result_type: Literal[None] = ..., + result_type: None = ..., args=..., *, axis: AxisTypeColumn, From 8187af611e6fcf29451d57d511b0731ad57162c5 Mon Sep 17 00:00:00 2001 From: Soshi Katsuta Date: Sat, 17 Dec 2022 00:10:10 +0900 Subject: [PATCH 3/7] Add support for apply func that returns Mapping with axis=0 and result_type=None --- pandas-stubs/core/frame.pyi | 12 ++++++++++++ tests/test_frame.py | 11 +++-------- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/pandas-stubs/core/frame.pyi b/pandas-stubs/core/frame.pyi index f3c4ffea3..ac2018efb 100644 --- a/pandas-stubs/core/frame.pyi +++ b/pandas-stubs/core/frame.pyi @@ -1110,6 +1110,18 @@ class DataFrame(NDFrame, OpsMixin): args=..., **kwargs, ) -> Series[S1]: ... + # Since non-scalar type T is not supported in Series[T], + # we separate this overload from the above one + @overload + def apply( + self, + f: Callable[..., Mapping], + axis: AxisTypeIndex = ..., + raw: _bool = ..., + result_type: None = ..., + args=..., + **kwargs, + ) -> Series: ... # apply() overloads with keyword result_type, and axis does not matter @overload diff --git a/tests/test_frame.py b/tests/test_frame.py index 4c77a6a7f..943e93654 100644 --- a/tests/test_frame.py +++ b/tests/test_frame.py @@ -492,14 +492,9 @@ def gethead(s: pd.Series, y: int) -> pd.Series: # Check various return types for default result_type (None) with default axis (0) check(assert_type(df.apply(returns_scalar), "pd.Series[int]"), pd.Series, int) - check( - assert_type(df.apply(returns_series), pd.DataFrame), - pd.DataFrame, - ) - check( - assert_type(df.apply(returns_listlike_of_3), pd.DataFrame), - pd.DataFrame, - ) + check(assert_type(df.apply(returns_series), pd.DataFrame), pd.DataFrame) + check(assert_type(df.apply(returns_listlike_of_3), pd.DataFrame), pd.DataFrame) + check(assert_type(df.apply(returns_dict), pd.Series), pd.Series) # Check various return types for result_type="expand" with default axis (0) check( From 05940be444fff8fd4733c22e4980c781ee8fae4d Mon Sep 17 00:00:00 2001 From: Soshi Katsuta Date: Sat, 17 Dec 2022 00:25:02 +0900 Subject: [PATCH 4/7] Replace dict with Mapping in return type of f for DataFrame#apply --- pandas-stubs/core/frame.pyi | 2 +- tests/test_frame.py | 8 +++----- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/pandas-stubs/core/frame.pyi b/pandas-stubs/core/frame.pyi index ac2018efb..094a62aa0 100644 --- a/pandas-stubs/core/frame.pyi +++ b/pandas-stubs/core/frame.pyi @@ -1138,7 +1138,7 @@ class DataFrame(NDFrame, OpsMixin): @overload def apply( self, - f: Callable[..., ListLikeExceptSeriesAndStr | Series | dict], + f: Callable[..., ListLikeExceptSeriesAndStr | Series | Mapping], axis: AxisType = ..., raw: _bool = ..., args=..., diff --git a/tests/test_frame.py b/tests/test_frame.py index 943e93654..a10208117 100644 --- a/tests/test_frame.py +++ b/tests/test_frame.py @@ -498,11 +498,9 @@ def gethead(s: pd.Series, y: int) -> pd.Series: # Check various return types for result_type="expand" with default axis (0) check( - assert_type( - # Note that technically it does not make sense to pass a result_type of "expand" to a scalar return - df.apply(returns_scalar, result_type="expand"), - "pd.Series[int]", - ), + # Note that technically it does not make sense + # to pass a result_type of "expand" to a scalar return + assert_type(df.apply(returns_scalar, result_type="expand"), "pd.Series[int]"), pd.Series, int, ) From b101d7746bc5f474948d5764978f49c5f9fbc932 Mon Sep 17 00:00:00 2001 From: Soshi Katsuta Date: Sat, 17 Dec 2022 00:37:19 +0900 Subject: [PATCH 5/7] Add support for apply func that returns Mapping with result_type="reduce" --- pandas-stubs/core/frame.pyi | 2 +- tests/test_frame.py | 41 ++++++++++++++++++++----------------- 2 files changed, 23 insertions(+), 20 deletions(-) diff --git a/pandas-stubs/core/frame.pyi b/pandas-stubs/core/frame.pyi index 094a62aa0..9efe78ae1 100644 --- a/pandas-stubs/core/frame.pyi +++ b/pandas-stubs/core/frame.pyi @@ -1149,7 +1149,7 @@ class DataFrame(NDFrame, OpsMixin): @overload def apply( self, - f: Callable[..., ListLikeExceptSeriesAndStr], + f: Callable[..., ListLikeExceptSeriesAndStr | Mapping], axis: AxisType = ..., raw: _bool = ..., args=..., diff --git a/tests/test_frame.py b/tests/test_frame.py index a10208117..23393fa19 100644 --- a/tests/test_frame.py +++ b/tests/test_frame.py @@ -521,26 +521,25 @@ def gethead(s: pd.Series, y: int) -> pd.Series: # Check various return types for result_type="reduce" with default axis (0) check( - assert_type( - # Note that technically it does not make sense to pass a result_type of "reduce" to a scalar return - df.apply(returns_scalar, result_type="reduce"), - "pd.Series[int]", - ), + # Note that technically it does not make sense + # to pass a result_type of "reduce" to a scalar return + assert_type(df.apply(returns_scalar, result_type="reduce"), "pd.Series[int]"), pd.Series, int, ) check( - assert_type( - # Note that technically it does not make sense to pass a result_type of "reduce" to a series return - df.apply(returns_series, result_type="reduce"), - pd.Series, - ), + # Note that technically it does not make sense + # to pass a result_type of "reduce" to a series return + assert_type(df.apply(returns_series, result_type="reduce"), pd.Series), pd.Series, # This technically returns a pd.Series[pd.Series], but typing does not support that ) check( assert_type(df.apply(returns_listlike_of_3, result_type="reduce"), pd.Series), pd.Series, ) + check( + assert_type(df.apply(returns_dict, result_type="reduce"), pd.Series), pd.Series + ) # Check various return types for result_type="broadcast" with default axis (0) check( @@ -579,10 +578,10 @@ def gethead(s: pd.Series, y: int) -> pd.Series: # Check various return types for result_type="expand" with axis=1 check( + # Note that technically it does not make sense + # to pass a result_type of "expand" to a scalar return assert_type( - # Note that technically it does not make sense to pass a result_type of "expand" to a scalar return - df.apply(returns_scalar, axis=1, result_type="expand"), - "pd.Series[int]", + df.apply(returns_scalar, axis=1, result_type="expand"), "pd.Series[int]" ), pd.Series, int, @@ -606,19 +605,19 @@ def gethead(s: pd.Series, y: int) -> pd.Series: # Check various return types for result_type="reduce" with axis=1 check( + # Note that technically it does not make sense + # to pass a result_type of "reduce" to a scalar return assert_type( - # Note that technically it does not make sense to pass a result_type of "reduce" to a scalar return - df.apply(returns_scalar, axis=1, result_type="reduce"), - "pd.Series[int]", + df.apply(returns_scalar, axis=1, result_type="reduce"), "pd.Series[int]" ), pd.Series, int, ) check( + # Note that technically it does not make sense + # to pass a result_type of "reduce" to a series return assert_type( - # Note that technically it does not make sense to pass a result_type of "reduce" to a series return - df.apply(returns_series, axis=1, result_type="reduce"), - pd.DataFrame, + df.apply(returns_series, axis=1, result_type="reduce"), pd.DataFrame ), pd.DataFrame, ) @@ -628,6 +627,10 @@ def gethead(s: pd.Series, y: int) -> pd.Series: ), pd.Series, ) + check( + assert_type(df.apply(returns_dict, axis=1, result_type="reduce"), pd.Series), + pd.Series, + ) # Check various return types for result_type="broadcast" with axis=1 check( From e6160fcb4f37951080517f6174ba0efdd0786340 Mon Sep 17 00:00:00 2001 From: Soshi Katsuta Date: Sat, 17 Dec 2022 01:30:52 +0900 Subject: [PATCH 6/7] Add support for apply func that returns Mapping with axis=1 and result_type=None --- pandas-stubs/core/frame.pyi | 2 +- tests/test_frame.py | 11 +++-------- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/pandas-stubs/core/frame.pyi b/pandas-stubs/core/frame.pyi index 9efe78ae1..9403ebcfa 100644 --- a/pandas-stubs/core/frame.pyi +++ b/pandas-stubs/core/frame.pyi @@ -1197,7 +1197,7 @@ class DataFrame(NDFrame, OpsMixin): @overload def apply( self, - f: Callable[..., ListLikeExceptSeriesAndStr], + f: Callable[..., ListLikeExceptSeriesAndStr | Mapping], raw: _bool = ..., result_type: None = ..., args=..., diff --git a/tests/test_frame.py b/tests/test_frame.py index 23393fa19..8f763049a 100644 --- a/tests/test_frame.py +++ b/tests/test_frame.py @@ -567,14 +567,9 @@ def gethead(s: pd.Series, y: int) -> pd.Series: check( assert_type(df.apply(returns_scalar, axis=1), "pd.Series[int]"), pd.Series, int ) - check( - assert_type(df.apply(returns_series, axis=1), pd.DataFrame), - pd.DataFrame, - ) - check( - assert_type(df.apply(returns_listlike_of_3, axis=1), pd.Series), - pd.Series, - ) + check(assert_type(df.apply(returns_series, axis=1), pd.DataFrame), pd.DataFrame) + check(assert_type(df.apply(returns_listlike_of_3, axis=1), pd.Series), pd.Series) + check(assert_type(df.apply(returns_dict, axis=1), pd.Series), pd.Series) # Check various return types for result_type="expand" with axis=1 check( From 370a0128068cc8e917f914823a00bd3dc615d16a Mon Sep 17 00:00:00 2001 From: Soshi Katsuta Date: Sat, 17 Dec 2022 01:17:47 +0900 Subject: [PATCH 7/7] Add support for apply func that returns Mapping with result_type="broadcast" --- pandas-stubs/core/frame.pyi | 2 +- tests/test_frame.py | 66 +++++++++++++++++++++---------------- 2 files changed, 39 insertions(+), 29 deletions(-) diff --git a/pandas-stubs/core/frame.pyi b/pandas-stubs/core/frame.pyi index 9403ebcfa..40dee71f1 100644 --- a/pandas-stubs/core/frame.pyi +++ b/pandas-stubs/core/frame.pyi @@ -1160,7 +1160,7 @@ class DataFrame(NDFrame, OpsMixin): @overload def apply( self, - f: Callable[..., ListLikeExceptSeriesAndStr | Series | Scalar], + f: Callable[..., ListLikeExceptSeriesAndStr | Series | Scalar | Mapping], axis: AxisType = ..., raw: _bool = ..., args=..., diff --git a/tests/test_frame.py b/tests/test_frame.py index 8f763049a..e8de03e29 100644 --- a/tests/test_frame.py +++ b/tests/test_frame.py @@ -541,28 +541,6 @@ def gethead(s: pd.Series, y: int) -> pd.Series: assert_type(df.apply(returns_dict, result_type="reduce"), pd.Series), pd.Series ) - # Check various return types for result_type="broadcast" with default axis (0) - check( - assert_type( - # Note that technically it does not make sense to pass a result_type of "broadcast" to a scalar return - df.apply(returns_scalar, result_type="broadcast"), - pd.DataFrame, - ), - pd.DataFrame, - ) - check( - assert_type(df.apply(returns_series, result_type="broadcast"), pd.DataFrame), - pd.DataFrame, - ) - check( - assert_type( - # Can only broadcast a list-like of 2 elements, not 3, because there are 2 rows - df.apply(returns_listlike_of_2, result_type="broadcast"), - pd.DataFrame, - ), - pd.DataFrame, - ) - # Check various return types for default result_type (None) with axis=1 check( assert_type(df.apply(returns_scalar, axis=1), "pd.Series[int]"), pd.Series, int @@ -627,12 +605,29 @@ def gethead(s: pd.Series, y: int) -> pd.Series: pd.Series, ) - # Check various return types for result_type="broadcast" with axis=1 + # Check various return types for result_type="broadcast" with axis=0 and axis=1 check( + # Note that technically it does not make sense + # to pass a result_type of "broadcast" to a scalar return + assert_type(df.apply(returns_scalar, result_type="broadcast"), pd.DataFrame), + pd.DataFrame, + ) + check( + assert_type(df.apply(returns_series, result_type="broadcast"), pd.DataFrame), + pd.DataFrame, + ) + check( + # Can only broadcast a list-like of 2 elements, not 3, because there are 2 rows assert_type( - # Note that technicaly it does not make sense to pass a result_type of "broadcast" to a scalar return - df.apply(returns_scalar, axis=1, result_type="broadcast"), - pd.DataFrame, + df.apply(returns_listlike_of_2, result_type="broadcast"), pd.DataFrame + ), + pd.DataFrame, + ) + check( + # Note that technicaly it does not make sense + # to pass a result_type of "broadcast" to a scalar return + assert_type( + df.apply(returns_scalar, axis=1, result_type="broadcast"), pd.DataFrame ), pd.DataFrame, ) @@ -644,14 +639,29 @@ def gethead(s: pd.Series, y: int) -> pd.Series: ) check( assert_type( - # Can only broadcast a list-like of 3 elements, not 2, as there are 3 columns + # Can only broadcast a list-like of 3 elements, not 2, + # as there are 3 columns df.apply(returns_listlike_of_3, axis=1, result_type="broadcast"), pd.DataFrame, ), pd.DataFrame, ) + # Since dicts will be assigned to elements of np.ndarray inside broadcasting, + # we need to use a DataFrame with object dtype to make the assignment possible. + df2 = pd.DataFrame({"col1": ["a", "b"], "col2": ["c", "d"]}) + check( + assert_type(df2.apply(returns_dict, result_type="broadcast"), pd.DataFrame), + pd.DataFrame, + ) + check( + assert_type( + df2.apply(returns_dict, axis=1, result_type="broadcast"), pd.DataFrame + ), + pd.DataFrame, + ) - # Test various other positional/keyword argument combinations to ensure all overloads are supported + # Test various other positional/keyword argument combinations + # to ensure all overloads are supported check( assert_type(df.apply(returns_scalar, axis=0), "pd.Series[int]"), pd.Series, int )