From 22c54af863c37370ed9a6b749297d8ffb2d34b3a Mon Sep 17 00:00:00 2001 From: Michael Milton Date: Sat, 23 Apr 2022 20:32:25 +1000 Subject: [PATCH 01/15] Update join docs for other param Update join docs regarding using multiple Series --- pandas/core/frame.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 74d061cbb9b7f..f0a1f4763aa92 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9379,7 +9379,7 @@ def _append( def join( self, - other: DataFrame | Series, + other: DataFrame | Series | list[DataFrame | Series], on: IndexLabel | None = None, how: str = "left", lsuffix: str = "", @@ -9395,7 +9395,7 @@ def join( Parameters ---------- - other : DataFrame, Series, or list of DataFrame + other : DataFrame, Series, or list of either of these Index should be similar to one of the columns in this one. If a Series is passed, its name attribute must be set, and that will be used as the column name in the resulting joined DataFrame. From 36eec5bd5188d03168aefcfbedb2a8f394c71991 Mon Sep 17 00:00:00 2001 From: Michael Milton Date: Sat, 23 Apr 2022 22:16:12 +1000 Subject: [PATCH 02/15] Update type for _join_compat --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index f0a1f4763aa92..c5fbef75eadb4 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9537,7 +9537,7 @@ def join( def _join_compat( self, - other: DataFrame | Series, + other: DataFrame | Series | list[DataFrame | Series], on: IndexLabel | None = None, how: str = "left", lsuffix: str = "", From 48048dbb13cc5c85d1972e5d99098f2375f924d1 Mon Sep 17 00:00:00 2001 From: Michael Milton Date: Sat, 28 May 2022 01:42:27 +1000 Subject: [PATCH 03/15] Allow any iterable for join; test join for a list of series --- pandas/core/frame.py | 4 ++-- pandas/tests/frame/methods/test_join.py | 7 +++++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index c5fbef75eadb4..2d61ca874804e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9379,7 +9379,7 @@ def _append( def join( self, - other: DataFrame | Series | list[DataFrame | Series], + other: DataFrame | Series | Iterable[DataFrame | Series], on: IndexLabel | None = None, how: str = "left", lsuffix: str = "", @@ -9537,7 +9537,7 @@ def join( def _join_compat( self, - other: DataFrame | Series | list[DataFrame | Series], + other: DataFrame | Series | Iterable[DataFrame | Series], on: IndexLabel | None = None, how: str = "left", lsuffix: str = "", diff --git a/pandas/tests/frame/methods/test_join.py b/pandas/tests/frame/methods/test_join.py index 597e95c8a2289..ab9ee57441ecd 100644 --- a/pandas/tests/frame/methods/test_join.py +++ b/pandas/tests/frame/methods/test_join.py @@ -225,6 +225,13 @@ def test_join_left_sequence_non_unique_index(): tm.assert_frame_equal(joined, expected) +def test_join_list_series(float_frame): + left = float_frame.A.to_frame() + right = [float_frame.B, float_frame[["C", "D"]]] + result = left.join(right) + assert result.equals(float_frame) + + @pytest.mark.parametrize("sort_kw", [True, False]) def test_suppress_future_warning_with_sort_kw(sort_kw): a = DataFrame({"col1": [1, 2]}, index=["c", "a"]) From 781130666b30860ea557ba07e429275aedb862d9 Mon Sep 17 00:00:00 2001 From: Michael Milton Date: Sat, 28 May 2022 03:23:54 +1000 Subject: [PATCH 04/15] Update type signature --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 2d61ca874804e..f372d60e6ae21 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9583,7 +9583,7 @@ def _join_compat( "Suffixes not supported when joining multiple DataFrames" ) - frames = [self] + list(other) + frames: list[Series | DataFrame] = [self] + list(other) can_concat = all(df.index.is_unique for df in frames) From bb002f8e14c41190b256c755b6b2b9ab9c7ecaab Mon Sep 17 00:00:00 2001 From: Michael Milton Date: Sat, 28 May 2022 14:47:38 +1000 Subject: [PATCH 05/15] Update pd.concat type, add cast() to make frame.join() work with mypy --- pandas/core/frame.py | 2 +- pandas/core/reshape/concat.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index f372d60e6ae21..b9d41a0074d2a 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9583,7 +9583,7 @@ def _join_compat( "Suffixes not supported when joining multiple DataFrames" ) - frames: list[Series | DataFrame] = [self] + list(other) + frames = [self] + list(cast(Sequence[DataFrame | Series], other)) can_concat = all(df.index.is_unique for df in frames) diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index c2b36dab4a67e..c8d0189f1963a 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -145,7 +145,7 @@ def concat( @deprecate_nonkeyword_arguments(version=None, allowed_args=["objs"]) def concat( - objs: Iterable[NDFrame] | Mapping[HashableT, NDFrame], + objs: Iterable[NDFrame | DataFrame | Series] | Mapping[HashableT, NDFrame], axis: Axis = 0, join: str = "outer", ignore_index: bool = False, From 436f5d3a8726e7237e36b05cb73029c628a6de83 Mon Sep 17 00:00:00 2001 From: Michael Milton Date: Sun, 12 Jun 2022 23:40:19 +1000 Subject: [PATCH 06/15] Fix type union syntax --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b9d41a0074d2a..d4ce76aa4a812 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9583,7 +9583,7 @@ def _join_compat( "Suffixes not supported when joining multiple DataFrames" ) - frames = [self] + list(cast(Sequence[DataFrame | Series], other)) + frames = [self] + list(cast(Sequence["DataFrame | Series"], other)) can_concat = all(df.index.is_unique for df in frames) From 76ead79b90a0379ccaf05937328ca92b5033a2f0 Mon Sep 17 00:00:00 2001 From: Michael Milton Date: Sun, 12 Jun 2022 23:42:49 +1000 Subject: [PATCH 07/15] NDFrame --- pandas/core/reshape/concat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index c8d0189f1963a..c2b36dab4a67e 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -145,7 +145,7 @@ def concat( @deprecate_nonkeyword_arguments(version=None, allowed_args=["objs"]) def concat( - objs: Iterable[NDFrame | DataFrame | Series] | Mapping[HashableT, NDFrame], + objs: Iterable[NDFrame] | Mapping[HashableT, NDFrame], axis: Axis = 0, join: str = "outer", ignore_index: bool = False, From b12f42e51a3793f8becdcc8f2c8f14c7dfa72e14 Mon Sep 17 00:00:00 2001 From: Michael Milton Date: Mon, 13 Jun 2022 00:04:02 +1000 Subject: [PATCH 08/15] Remove cast --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index d4ce76aa4a812..2d61ca874804e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9583,7 +9583,7 @@ def _join_compat( "Suffixes not supported when joining multiple DataFrames" ) - frames = [self] + list(cast(Sequence["DataFrame | Series"], other)) + frames = [self] + list(other) can_concat = all(df.index.is_unique for df in frames) From 3b04d62ecb4bd3134c1aec2d3214882e91b6dc9a Mon Sep 17 00:00:00 2001 From: Michael Milton Date: Wed, 15 Jun 2022 13:22:19 +1000 Subject: [PATCH 09/15] Fix mypy errors --- pandas/core/frame.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index eeab0cf4e4c8a..8400155d75149 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9699,7 +9699,9 @@ def _join_compat( "Suffixes not supported when joining multiple DataFrames" ) - frames = [self] + list(other) + frames = [cast("DataFrame | Series", self)] + list( + cast("Iterable[DataFrame | Series]", other) + ) can_concat = all(df.index.is_unique for df in frames) From 0a241c07410eadd211dc5eac06202a84b2bd5c2d Mon Sep 17 00:00:00 2001 From: Michael Milton Date: Fri, 17 Jun 2022 13:05:17 +1000 Subject: [PATCH 10/15] Code review --- pandas/core/frame.py | 9 ++++----- pandas/tests/frame/methods/test_join.py | 4 +++- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 70171803df8c9..fd947ff8ac4a3 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9565,7 +9565,7 @@ def _append( def join( self, - other: DataFrame | Series | Iterable[DataFrame | Series], + other: DataFrame | Series | list[DataFrame | Series], on: IndexLabel | None = None, how: str = "left", lsuffix: str = "", @@ -9582,7 +9582,8 @@ def join( Parameters ---------- - other : DataFrame, Series, or list of either of these + other : DataFrame, Series, or a list containing any combination of DataFrames + and Series. Index should be similar to one of the columns in this one. If a Series is passed, its name attribute must be set, and that will be used as the column name in the resulting joined DataFrame. @@ -9787,9 +9788,7 @@ def _join_compat( "Suffixes not supported when joining multiple DataFrames" ) - frames = [cast("DataFrame | Series", self)] + list( - cast("Iterable[DataFrame | Series]", other) - ) + frames = [cast("DataFrame | Series", self)] + list(other) can_concat = all(df.index.is_unique for df in frames) diff --git a/pandas/tests/frame/methods/test_join.py b/pandas/tests/frame/methods/test_join.py index cb0d875123c57..2edc727d82f26 100644 --- a/pandas/tests/frame/methods/test_join.py +++ b/pandas/tests/frame/methods/test_join.py @@ -368,10 +368,12 @@ def test_join_left_sequence_non_unique_index(): def test_join_list_series(float_frame): + # GH#46850 + # Join a DataFrame with a list containing both a Series and a DataFrame left = float_frame.A.to_frame() right = [float_frame.B, float_frame[["C", "D"]]] result = left.join(right) - assert result.equals(float_frame) + assert tm.assert_frame_equal(result, float_frame) @pytest.mark.parametrize("sort_kw", [True, False]) From f72a5b581899207a0d3edb009bce823d29407d08 Mon Sep 17 00:00:00 2001 From: Michael Milton Date: Fri, 17 Jun 2022 14:41:09 +1000 Subject: [PATCH 11/15] Remove unnecessary assert --- pandas/tests/frame/methods/test_join.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/frame/methods/test_join.py b/pandas/tests/frame/methods/test_join.py index 2edc727d82f26..7db26f7eb570b 100644 --- a/pandas/tests/frame/methods/test_join.py +++ b/pandas/tests/frame/methods/test_join.py @@ -373,7 +373,7 @@ def test_join_list_series(float_frame): left = float_frame.A.to_frame() right = [float_frame.B, float_frame[["C", "D"]]] result = left.join(right) - assert tm.assert_frame_equal(result, float_frame) + tm.assert_frame_equal(result, float_frame) @pytest.mark.parametrize("sort_kw", [True, False]) From 75436c810243094a2711e23785facf1ecef30830 Mon Sep 17 00:00:00 2001 From: Michael Milton Date: Mon, 18 Jul 2022 23:13:11 +1000 Subject: [PATCH 12/15] Add comment explaining the cast --- pandas/core/frame.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 8894893ed1648..2f2ceadd04bd4 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -10011,6 +10011,10 @@ def _join_compat( "Suffixes not supported when joining multiple DataFrames" ) + # Mypy thinks the LHS is a + # "Union[DataFrame, Series, Iterable[Union[DataFrame, Series]]]" whereas + # the RHS is an "Iterable[DataFrame]", but in reality both types are + # "Iterable[Union[DataFrame, Series]]" due to the if statements frames = [cast("DataFrame | Series", self)] + list(other) can_concat = all(df.index.is_unique for df in frames) From f48213f499ca06088b1ca19786c5dc953f4f9f2a Mon Sep 17 00:00:00 2001 From: Michael Milton Date: Mon, 18 Jul 2022 23:56:36 +1000 Subject: [PATCH 13/15] Fix swapped order of cast comment --- pandas/core/frame.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 2f2ceadd04bd4..d388d8de72154 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -10011,9 +10011,9 @@ def _join_compat( "Suffixes not supported when joining multiple DataFrames" ) - # Mypy thinks the LHS is a + # Mypy thinks the RHS is a # "Union[DataFrame, Series, Iterable[Union[DataFrame, Series]]]" whereas - # the RHS is an "Iterable[DataFrame]", but in reality both types are + # the LHS is an "Iterable[DataFrame]", but in reality both types are # "Iterable[Union[DataFrame, Series]]" due to the if statements frames = [cast("DataFrame | Series", self)] + list(other) From b79e66c25914c6fe17282bb8dd15aee8f92ad0bc Mon Sep 17 00:00:00 2001 From: Michael Milton Date: Tue, 19 Jul 2022 10:19:22 +1000 Subject: [PATCH 14/15] Remove full stop --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index d388d8de72154..dbc6012de77f9 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9806,7 +9806,7 @@ def join( Parameters ---------- other : DataFrame, Series, or a list containing any combination of DataFrames - and Series. + and Series Index should be similar to one of the columns in this one. If a Series is passed, its name attribute must be set, and that will be used as the column name in the resulting joined DataFrame. From 4ba0ce09ca456e85378ea73788146528ef9b19ac Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Thu, 21 Jul 2022 12:15:02 +0700 Subject: [PATCH 15/15] Update pandas/core/frame.py --- pandas/core/frame.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index dbc6012de77f9..6319fb82bd81c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9805,8 +9805,7 @@ def join( Parameters ---------- - other : DataFrame, Series, or a list containing any combination of DataFrames - and Series + other : DataFrame, Series, or a list containing any combination of them Index should be similar to one of the columns in this one. If a Series is passed, its name attribute must be set, and that will be used as the column name in the resulting joined DataFrame.