From d39115dbf207a06aa66482eb15d4fe76fcc74611 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Sat, 27 Aug 2022 00:17:07 +0200 Subject: [PATCH 1/2] Add AnyArrayLike for merge on arguments --- pandas-stubs/core/frame.pyi | 7 ++++--- pandas-stubs/core/reshape/merge.pyi | 21 ++++++++++++--------- tests/test_merge.py | 25 +++++++++++++++++++++++++ 3 files changed, 41 insertions(+), 12 deletions(-) diff --git a/pandas-stubs/core/frame.pyi b/pandas-stubs/core/frame.pyi index 0766aa586..f80b65bd6 100644 --- a/pandas-stubs/core/frame.pyi +++ b/pandas-stubs/core/frame.pyi @@ -44,6 +44,7 @@ from pandas._typing import ( AggFuncType, AggFuncTypeBase, AggFuncTypeDict, + AnyArrayLike, ArrayLike, Axes, Axis, @@ -1049,9 +1050,9 @@ class DataFrame(NDFrame, OpsMixin): self, right: DataFrame | Series, how: MergeHow = ..., - on: IndexLabel | None = ..., - left_on: IndexLabel | None = ..., - right_on: IndexLabel | None = ..., + on: IndexLabel | AnyArrayLike | None = ..., + left_on: IndexLabel | AnyArrayLike | None = ..., + right_on: IndexLabel | AnyArrayLike | None = ..., left_index: _bool = ..., right_index: _bool = ..., sort: _bool = ..., diff --git a/pandas-stubs/core/reshape/merge.pyi b/pandas-stubs/core/reshape/merge.pyi index b9f441670..8233f51c2 100644 --- a/pandas-stubs/core/reshape/merge.pyi +++ b/pandas-stubs/core/reshape/merge.pyi @@ -6,15 +6,18 @@ from pandas import ( ) from pandas._libs.tslibs import Timedelta -from pandas._typing import Label +from pandas._typing import ( + AnyArrayLike, + Label, +) def merge( left: DataFrame | Series, right: DataFrame | Series, how: str = ..., - on: Label | Sequence | None = ..., - left_on: Label | Sequence | None = ..., - right_on: Label | Sequence | None = ..., + on: Label | Sequence | AnyArrayLike | None = ..., + left_on: Label | Sequence | AnyArrayLike | None = ..., + right_on: Label | Sequence | AnyArrayLike | None = ..., left_index: bool = ..., right_index: bool = ..., sort: bool = ..., @@ -26,9 +29,9 @@ def merge( def merge_ordered( left: DataFrame | Series, right: DataFrame | Series, - on: Label | Sequence | None = ..., - left_on: Label | Sequence | None = ..., - right_on: Label | Sequence | None = ..., + on: Label | Sequence | AnyArrayLike | None = ..., + left_on: Label | Sequence | AnyArrayLike | None = ..., + right_on: Label | Sequence | AnyArrayLike | None = ..., left_by: str | list[str] | None = ..., right_by: str | list[str] | None = ..., fill_method: str | None = ..., @@ -39,8 +42,8 @@ def merge_asof( left: DataFrame | Series, right: DataFrame | Series, on: Label | None = ..., - left_on: Label | None = ..., - right_on: Label | None = ..., + left_on: Label | AnyArrayLike | None = ..., + right_on: Label | AnyArrayLike | None = ..., left_index: bool = ..., right_index: bool = ..., by: str | list[str] | None = ..., diff --git a/tests/test_merge.py b/tests/test_merge.py index 9621ccb30..1efabaca0 100644 --- a/tests/test_merge.py +++ b/tests/test_merge.py @@ -1,5 +1,6 @@ from __future__ import annotations +import numpy as np import pandas as pd @@ -8,3 +9,27 @@ def test_types_merge() -> None: df2 = pd.DataFrame(data={"col1": [1, 1, 2], "col2": [0, 1, 0]}) columns = ["col1", "col2"] df.merge(df2, on=columns) + + df.merge(df2, on=pd.Series([1, 2, 3])) + df.merge(df2, on=pd.Index([1, 2, 3])) + df.merge(df2, on=np.array([1, 2, 3])) + + df.merge(df2, left_on=pd.Series([1, 2, 3]), right_on=pd.Series([1, 2, 3])) + df.merge(df2, left_on=pd.Index([1, 2, 3]), right_on=pd.Series([1, 2, 3])) + df.merge(df2, left_on=pd.Index([1, 2, 3]), right_on=pd.Index([1, 2, 3])) + + df.merge(df2, left_on=np.array([1, 2, 3]), right_on=pd.Series([1, 2, 3])) + df.merge(df2, left_on=np.array([1, 2, 3]), right_on=pd.Index([1, 2, 3])) + df.merge(df2, left_on=np.array([1, 2, 3]), right_on=np.array([1, 2, 3])) + + pd.merge(df, df2, on=pd.Series([1, 2, 3])) + pd.merge(df, df2, on=pd.Index([1, 2, 3])) + pd.merge(df, df2, on=np.array([1, 2, 3])) + + pd.merge(df, df2, left_on=pd.Series([1, 2, 3]), right_on=pd.Series([1, 2, 3])) + pd.merge(df, df2, left_on=pd.Index([1, 2, 3]), right_on=pd.Series([1, 2, 3])) + pd.merge(df, df2, left_on=pd.Index([1, 2, 3]), right_on=pd.Index([1, 2, 3])) + + pd.merge(df, df2, left_on=np.array([1, 2, 3]), right_on=pd.Series([1, 2, 3])) + pd.merge(df, df2, left_on=np.array([1, 2, 3]), right_on=pd.Index([1, 2, 3])) + pd.merge(df, df2, left_on=np.array([1, 2, 3]), right_on=np.array([1, 2, 3])) From 228f881dd5477666ae9b8807efc8cc2ad543d7e9 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Sat, 27 Aug 2022 22:13:07 +0200 Subject: [PATCH 2/2] Add checks --- tests/test_merge.py | 138 ++++++++++++++++++++++++++++++++++++++------ 1 file changed, 120 insertions(+), 18 deletions(-) diff --git a/tests/test_merge.py b/tests/test_merge.py index 1efabaca0..92ce63acb 100644 --- a/tests/test_merge.py +++ b/tests/test_merge.py @@ -2,6 +2,9 @@ import numpy as np import pandas as pd +from typing_extensions import assert_type + +from tests import check def test_types_merge() -> None: @@ -10,26 +13,125 @@ def test_types_merge() -> None: columns = ["col1", "col2"] df.merge(df2, on=columns) - df.merge(df2, on=pd.Series([1, 2, 3])) - df.merge(df2, on=pd.Index([1, 2, 3])) - df.merge(df2, on=np.array([1, 2, 3])) + check( + assert_type(df.merge(df2, on=pd.Series([1, 2, 3])), pd.DataFrame), pd.DataFrame + ) + check( + assert_type(df.merge(df2, on=pd.Index([1, 2, 3])), pd.DataFrame), pd.DataFrame + ) + check( + assert_type(df.merge(df2, on=np.array([1, 2, 3])), pd.DataFrame), pd.DataFrame + ) - df.merge(df2, left_on=pd.Series([1, 2, 3]), right_on=pd.Series([1, 2, 3])) - df.merge(df2, left_on=pd.Index([1, 2, 3]), right_on=pd.Series([1, 2, 3])) - df.merge(df2, left_on=pd.Index([1, 2, 3]), right_on=pd.Index([1, 2, 3])) + check( + assert_type( + df.merge(df2, left_on=pd.Series([1, 2, 3]), right_on=pd.Series([1, 2, 3])), + pd.DataFrame, + ), + pd.DataFrame, + ) + check( + assert_type( + df.merge(df2, left_on=pd.Index([1, 2, 3]), right_on=pd.Series([1, 2, 3])), + pd.DataFrame, + ), + pd.DataFrame, + ) + check( + assert_type( + df.merge(df2, left_on=pd.Index([1, 2, 3]), right_on=pd.Index([1, 2, 3])), + pd.DataFrame, + ), + pd.DataFrame, + ) - df.merge(df2, left_on=np.array([1, 2, 3]), right_on=pd.Series([1, 2, 3])) - df.merge(df2, left_on=np.array([1, 2, 3]), right_on=pd.Index([1, 2, 3])) - df.merge(df2, left_on=np.array([1, 2, 3]), right_on=np.array([1, 2, 3])) + check( + assert_type( + df.merge(df2, left_on=np.array([1, 2, 3]), right_on=pd.Series([1, 2, 3])), + pd.DataFrame, + ), + pd.DataFrame, + ) + check( + assert_type( + df.merge(df2, left_on=np.array([1, 2, 3]), right_on=pd.Index([1, 2, 3])), + pd.DataFrame, + ), + pd.DataFrame, + ) + check( + assert_type( + df.merge(df2, left_on=np.array([1, 2, 3]), right_on=np.array([1, 2, 3])), + pd.DataFrame, + ), + pd.DataFrame, + ) - pd.merge(df, df2, on=pd.Series([1, 2, 3])) - pd.merge(df, df2, on=pd.Index([1, 2, 3])) - pd.merge(df, df2, on=np.array([1, 2, 3])) + check( + assert_type(pd.merge(df, df2, on=pd.Series([1, 2, 3])), pd.DataFrame), + pd.DataFrame, + ) + check( + assert_type(pd.merge(df, df2, on=pd.Index([1, 2, 3])), pd.DataFrame), + pd.DataFrame, + ) + check( + assert_type(pd.merge(df, df2, on=np.array([1, 2, 3])), pd.DataFrame), + pd.DataFrame, + ) - pd.merge(df, df2, left_on=pd.Series([1, 2, 3]), right_on=pd.Series([1, 2, 3])) - pd.merge(df, df2, left_on=pd.Index([1, 2, 3]), right_on=pd.Series([1, 2, 3])) - pd.merge(df, df2, left_on=pd.Index([1, 2, 3]), right_on=pd.Index([1, 2, 3])) + check( + assert_type( + pd.merge( + df, df2, left_on=pd.Series([1, 2, 3]), right_on=pd.Series([1, 2, 3]) + ), + pd.DataFrame, + ), + pd.DataFrame, + ) + check( + assert_type( + pd.merge( + df, df2, left_on=pd.Index([1, 2, 3]), right_on=pd.Series([1, 2, 3]) + ), + pd.DataFrame, + ), + pd.DataFrame, + ) + check( + assert_type( + pd.merge( + df, df2, left_on=pd.Index([1, 2, 3]), right_on=pd.Index([1, 2, 3]) + ), + pd.DataFrame, + ), + pd.DataFrame, + ) - pd.merge(df, df2, left_on=np.array([1, 2, 3]), right_on=pd.Series([1, 2, 3])) - pd.merge(df, df2, left_on=np.array([1, 2, 3]), right_on=pd.Index([1, 2, 3])) - pd.merge(df, df2, left_on=np.array([1, 2, 3]), right_on=np.array([1, 2, 3])) + check( + assert_type( + pd.merge( + df, df2, left_on=np.array([1, 2, 3]), right_on=pd.Series([1, 2, 3]) + ), + pd.DataFrame, + ), + pd.DataFrame, + ) + check( + assert_type( + pd.merge( + df, df2, left_on=np.array([1, 2, 3]), right_on=pd.Index([1, 2, 3]) + ), + pd.DataFrame, + ), + pd.DataFrame, + ) + check( + assert_type( + pd.merge( + df, df2, left_on=np.array([1, 2, 3]), right_on=np.array([1, 2, 3]) + ), + pd.DataFrame, + ), + pd.DataFrame, + )