From 6551c188f60e3c3e3afc831e3341ce3134c000d5 Mon Sep 17 00:00:00 2001 From: GYHHAHA <1801214626@qq.com> Date: Thu, 26 Nov 2020 23:04:30 +0800 Subject: [PATCH 1/9] merge_ordered fails with list-like left_by --- doc/source/whatsnew/v1.2.0.rst | 1 + pandas/core/reshape/merge.py | 4 +-- .../tests/reshape/merge/test_merge_ordered.py | 30 +++++++++++++++++++ 3 files changed, 32 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 6f046d3a9379d..32ddff2e75a22 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -737,6 +737,7 @@ Reshaping - Bug in :meth:`DataFrame.apply` not setting index of return value when ``func`` return type is ``dict`` (:issue:`37544`) - Bug in :func:`concat` resulting in a ``ValueError`` when at least one of both inputs had a non-unique index (:issue:`36263`) - Bug in :meth:`DataFrame.merge` and :meth:`pandas.merge` returning inconsistent ordering in result for ``how=right`` and ``how=left`` (:issue:`35382`) +- Bug in :func:`merge_ordered` wasn't able to handle list-like ``left_by`` or ``right_by`` (:issue:`35269`) Sparse ^^^^^^ diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 3b755c40721fb..7b3d68500dd98 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -140,9 +140,7 @@ def _groupby_and_merge(by, on, left: "DataFrame", right: "DataFrame", merge_piec # make sure join keys are in the merged # TODO, should merge_pieces do this? - for k in by: - if k in merged: - merged[k] = key + merged[by] = key pieces.append(merged) diff --git a/pandas/tests/reshape/merge/test_merge_ordered.py b/pandas/tests/reshape/merge/test_merge_ordered.py index 17f2f44f45fce..5df8b87202331 100644 --- a/pandas/tests/reshape/merge/test_merge_ordered.py +++ b/pandas/tests/reshape/merge/test_merge_ordered.py @@ -115,3 +115,33 @@ def test_doc_example(self): ) tm.assert_frame_equal(result, expected) + + def test_list_type_by(self): + # GH 35269 + left = DataFrame( + { + "G": ["g", "g"], + "H": ["h", "h"], + "T": [1, 3] + } + ) + + right = DataFrame( + { + "T": [2], + "E": [1] + } + ) + + result = merge_ordered(left, right, on=["T"], left_by=["G", "H"]) + + expected = DataFrame( + { + "G": ["g", "g", "g"], + "H": ["h", "h", "h"], + "T": [1, 2, 3], + "E": [np.nan, 1.0, np.nan] + } + ) + + tm.assert_frame_equal(result, expected) From eb00e32e8898c7462317ca54b2b577ee3d8e06bf Mon Sep 17 00:00:00 2001 From: GYHHAHA <1801214626@qq.com> Date: Thu, 26 Nov 2020 23:17:22 +0800 Subject: [PATCH 2/9] simplify df --- .../tests/reshape/merge/test_merge_ordered.py | 24 ++++--------------- 1 file changed, 5 insertions(+), 19 deletions(-) diff --git a/pandas/tests/reshape/merge/test_merge_ordered.py b/pandas/tests/reshape/merge/test_merge_ordered.py index 5df8b87202331..cf723aa0f614b 100644 --- a/pandas/tests/reshape/merge/test_merge_ordered.py +++ b/pandas/tests/reshape/merge/test_merge_ordered.py @@ -118,29 +118,15 @@ def test_doc_example(self): def test_list_type_by(self): # GH 35269 - left = DataFrame( - { - "G": ["g", "g"], - "H": ["h", "h"], - "T": [1, 3] - } - ) - - right = DataFrame( - { - "T": [2], - "E": [1] - } - ) - + left = DataFrame({"G": ["g", "g"], "H": ["h", "h"], "T": [1, 3]}) + right = DataFrame({"T": [2], "E": [1]}) result = merge_ordered(left, right, on=["T"], left_by=["G", "H"]) - expected = DataFrame( { - "G": ["g", "g", "g"], - "H": ["h", "h", "h"], + "G": ["g"] * 3, + "H": ["h"] * 3, "T": [1, 2, 3], - "E": [np.nan, 1.0, np.nan] + "E": [np.nan, 1.0, np.nan], } ) From 7fabf459cbb6491e63fd82ed5327d12109c18045 Mon Sep 17 00:00:00 2001 From: GYHHAHA <1801214626@qq.com> Date: Fri, 27 Nov 2020 10:30:58 +0800 Subject: [PATCH 3/9] add test cases --- .../tests/reshape/merge/test_merge_ordered.py | 42 +++++++++++++------ 1 file changed, 29 insertions(+), 13 deletions(-) diff --git a/pandas/tests/reshape/merge/test_merge_ordered.py b/pandas/tests/reshape/merge/test_merge_ordered.py index cf723aa0f614b..f2f84909e83b1 100644 --- a/pandas/tests/reshape/merge/test_merge_ordered.py +++ b/pandas/tests/reshape/merge/test_merge_ordered.py @@ -118,16 +118,32 @@ def test_doc_example(self): def test_list_type_by(self): # GH 35269 - left = DataFrame({"G": ["g", "g"], "H": ["h", "h"], "T": [1, 3]}) - right = DataFrame({"T": [2], "E": [1]}) - result = merge_ordered(left, right, on=["T"], left_by=["G", "H"]) - expected = DataFrame( - { - "G": ["g"] * 3, - "H": ["h"] * 3, - "T": [1, 2, 3], - "E": [np.nan, 1.0, np.nan], - } - ) - - tm.assert_frame_equal(result, expected) + left = pd.DataFrame({"G": ["g", "g"], "H": ["h", "h"], "T": [1, 3]}) + right = pd.DataFrame({"T": [2], "E": [1]}) + result1 = pd.merge_ordered(left, right, on=["T"], left_by=["G", "H"]) + result2 = pd.merge_ordered(left, right, on="T", left_by=["G", "H"]) + + expected = pd.DataFrame( + { + "G": ["g"] * 3, + "H": ["h"] * 3, + "T": [1, 2, 3], + "E": [np.nan, 1.0, np.nan], + } + ) + + tm.assert_frame_equal(result1, expected) + tm.assert_frame_equal(result2, expected) + + result3 = pd.merge_ordered(right, left, on=["T"], right_by=["G", "H"]) + + expected = pd.DataFrame( + { + "T": [1, 2, 3], + "E": [np.nan, 1.0, np.nan], + "G": ["g"] * 3, + "H": ["h"] * 3, + } + ) + + tm.assert_frame_equal(result3, expected) From 5190e5176d4fbe674c508488d2404da4c849d79c Mon Sep 17 00:00:00 2001 From: GYHHAHA <1801214626@qq.com> Date: Fri, 27 Nov 2020 10:34:21 +0800 Subject: [PATCH 4/9] fix format --- .../tests/reshape/merge/test_merge_ordered.py | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/pandas/tests/reshape/merge/test_merge_ordered.py b/pandas/tests/reshape/merge/test_merge_ordered.py index f2f84909e83b1..04aec7634fded 100644 --- a/pandas/tests/reshape/merge/test_merge_ordered.py +++ b/pandas/tests/reshape/merge/test_merge_ordered.py @@ -124,13 +124,13 @@ def test_list_type_by(self): result2 = pd.merge_ordered(left, right, on="T", left_by=["G", "H"]) expected = pd.DataFrame( - { - "G": ["g"] * 3, - "H": ["h"] * 3, - "T": [1, 2, 3], - "E": [np.nan, 1.0, np.nan], - } - ) + { + "G": ["g"] * 3, + "H": ["h"] * 3, + "T": [1, 2, 3], + "E": [np.nan, 1.0, np.nan], + } + ) tm.assert_frame_equal(result1, expected) tm.assert_frame_equal(result2, expected) @@ -138,12 +138,12 @@ def test_list_type_by(self): result3 = pd.merge_ordered(right, left, on=["T"], right_by=["G", "H"]) expected = pd.DataFrame( - { - "T": [1, 2, 3], - "E": [np.nan, 1.0, np.nan], - "G": ["g"] * 3, - "H": ["h"] * 3, - } - ) + { + "T": [1, 2, 3], + "E": [np.nan, 1.0, np.nan], + "G": ["g"] * 3, + "H": ["h"] * 3, + } + ) tm.assert_frame_equal(result3, expected) From 679f9b7beef053937750d02eaa7050e5065b8d87 Mon Sep 17 00:00:00 2001 From: GYHHAHA <1801214626@qq.com> Date: Fri, 27 Nov 2020 10:46:17 +0800 Subject: [PATCH 5/9] Update test_merge_ordered.py --- pandas/tests/reshape/merge/test_merge_ordered.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pandas/tests/reshape/merge/test_merge_ordered.py b/pandas/tests/reshape/merge/test_merge_ordered.py index 04aec7634fded..380cedbc16b31 100644 --- a/pandas/tests/reshape/merge/test_merge_ordered.py +++ b/pandas/tests/reshape/merge/test_merge_ordered.py @@ -118,12 +118,12 @@ def test_doc_example(self): def test_list_type_by(self): # GH 35269 - left = pd.DataFrame({"G": ["g", "g"], "H": ["h", "h"], "T": [1, 3]}) - right = pd.DataFrame({"T": [2], "E": [1]}) - result1 = pd.merge_ordered(left, right, on=["T"], left_by=["G", "H"]) - result2 = pd.merge_ordered(left, right, on="T", left_by=["G", "H"]) + left = DataFrame({"G": ["g", "g"], "H": ["h", "h"], "T": [1, 3]}) + right = DataFrame({"T": [2], "E": [1]}) + result1 = merge_ordered(left, right, on=["T"], left_by=["G", "H"]) + result2 = merge_ordered(left, right, on="T", left_by=["G", "H"]) - expected = pd.DataFrame( + expected = DataFrame( { "G": ["g"] * 3, "H": ["h"] * 3, @@ -135,9 +135,9 @@ def test_list_type_by(self): tm.assert_frame_equal(result1, expected) tm.assert_frame_equal(result2, expected) - result3 = pd.merge_ordered(right, left, on=["T"], right_by=["G", "H"]) + result3 = merge_ordered(right, left, on=["T"], right_by=["G", "H"]) - expected = pd.DataFrame( + expected = DataFrame( { "T": [1, 2, 3], "E": [np.nan, 1.0, np.nan], From 6ad516a696ccdb326e7f5bf12ca95a10139f7fb4 Mon Sep 17 00:00:00 2001 From: GYHHAHA <1801214626@qq.com> Date: Fri, 27 Nov 2020 11:54:57 +0800 Subject: [PATCH 6/9] Update v1.2.0.rst --- doc/source/whatsnew/v1.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 32ddff2e75a22..6ab9f9a58cb0c 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -737,7 +737,7 @@ Reshaping - Bug in :meth:`DataFrame.apply` not setting index of return value when ``func`` return type is ``dict`` (:issue:`37544`) - Bug in :func:`concat` resulting in a ``ValueError`` when at least one of both inputs had a non-unique index (:issue:`36263`) - Bug in :meth:`DataFrame.merge` and :meth:`pandas.merge` returning inconsistent ordering in result for ``how=right`` and ``how=left`` (:issue:`35382`) -- Bug in :func:`merge_ordered` wasn't able to handle list-like ``left_by`` or ``right_by`` (:issue:`35269`) +- Bug in :func:`merge_ordered` couldn't handle list-like ``left_by`` or ``right_by`` (:issue:`35269`) Sparse ^^^^^^ From 45ce9461f5fde3e85e7c2f41e607ba7b9dc51891 Mon Sep 17 00:00:00 2001 From: GYHHAHA <1801214626@qq.com> Date: Sat, 28 Nov 2020 19:45:01 +0800 Subject: [PATCH 7/9] Update v1.2.0.rst --- doc/source/whatsnew/v1.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 6ab9f9a58cb0c..32ddff2e75a22 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -737,7 +737,7 @@ Reshaping - Bug in :meth:`DataFrame.apply` not setting index of return value when ``func`` return type is ``dict`` (:issue:`37544`) - Bug in :func:`concat` resulting in a ``ValueError`` when at least one of both inputs had a non-unique index (:issue:`36263`) - Bug in :meth:`DataFrame.merge` and :meth:`pandas.merge` returning inconsistent ordering in result for ``how=right`` and ``how=left`` (:issue:`35382`) -- Bug in :func:`merge_ordered` couldn't handle list-like ``left_by`` or ``right_by`` (:issue:`35269`) +- Bug in :func:`merge_ordered` wasn't able to handle list-like ``left_by`` or ``right_by`` (:issue:`35269`) Sparse ^^^^^^ From 443436ba41c7d24107df58280c151f21d5f3266a Mon Sep 17 00:00:00 2001 From: GYHHAHA <1801214626@qq.com> Date: Sun, 29 Nov 2020 14:47:29 +0800 Subject: [PATCH 8/9] Update test_merge_ordered.py --- .../tests/reshape/merge/test_merge_ordered.py | 86 +++++++++++++------ 1 file changed, 58 insertions(+), 28 deletions(-) diff --git a/pandas/tests/reshape/merge/test_merge_ordered.py b/pandas/tests/reshape/merge/test_merge_ordered.py index 380cedbc16b31..8389a6bb9be10 100644 --- a/pandas/tests/reshape/merge/test_merge_ordered.py +++ b/pandas/tests/reshape/merge/test_merge_ordered.py @@ -116,34 +116,64 @@ def test_doc_example(self): tm.assert_frame_equal(result, expected) - def test_list_type_by(self): + @pytest.mark.parametrize( + "left, right, on, left_by, right_by, expected", + [ + ( + DataFrame({"G": ["g", "g"], "H": ["h", "h"], "T": [1, 3]}), + DataFrame({"T": [2], "E": [1]}), + ["T"], + ["G", "H"], + None, + DataFrame( + { + "G": ["g"] * 3, + "H": ["h"] * 3, + "T": [1, 2, 3], + "E": [np.nan, 1.0, np.nan], + } + ), + ), + ( + DataFrame({"G": ["g", "g"], "H": ["h", "h"], "T": [1, 3]}), + DataFrame({"T": [2], "E": [1]}), + "T", + ["G", "H"], + None, + DataFrame( + { + "G": ["g"] * 3, + "H": ["h"] * 3, + "T": [1, 2, 3], + "E": [np.nan, 1.0, np.nan], + } + ), + ), + ( + DataFrame({"T": [2], "E": [1]}), + DataFrame({"G": ["g", "g"], "H": ["h", "h"], "T": [1, 3]}), + ["T"], + None, + ["G", "H"], + DataFrame( + { + "T": [1, 2, 3], + "E": [np.nan, 1.0, np.nan], + "G": ["g"] * 3, + "H": ["h"] * 3, + } + ), + ), + ], + ) + def test_list_type_by(self, left, right, on, left_by, right_by, expected): # GH 35269 - left = DataFrame({"G": ["g", "g"], "H": ["h", "h"], "T": [1, 3]}) - right = DataFrame({"T": [2], "E": [1]}) - result1 = merge_ordered(left, right, on=["T"], left_by=["G", "H"]) - result2 = merge_ordered(left, right, on="T", left_by=["G", "H"]) - - expected = DataFrame( - { - "G": ["g"] * 3, - "H": ["h"] * 3, - "T": [1, 2, 3], - "E": [np.nan, 1.0, np.nan], - } - ) - - tm.assert_frame_equal(result1, expected) - tm.assert_frame_equal(result2, expected) - - result3 = merge_ordered(right, left, on=["T"], right_by=["G", "H"]) - - expected = DataFrame( - { - "T": [1, 2, 3], - "E": [np.nan, 1.0, np.nan], - "G": ["g"] * 3, - "H": ["h"] * 3, - } + result = merge_ordered( + left=left, + right=right, + on=on, + left_by=left_by, + right_by=right_by, ) - tm.assert_frame_equal(result3, expected) + tm.assert_frame_equal(result, expected) From 20edd6030fc4890cbf85e5eb01229789da7aafa8 Mon Sep 17 00:00:00 2001 From: GYHHAHA <1801214626@qq.com> Date: Sun, 29 Nov 2020 17:11:24 +0800 Subject: [PATCH 9/9] Update v1.2.0.rst --- doc/source/whatsnew/v1.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 32ddff2e75a22..6ab9f9a58cb0c 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -737,7 +737,7 @@ Reshaping - Bug in :meth:`DataFrame.apply` not setting index of return value when ``func`` return type is ``dict`` (:issue:`37544`) - Bug in :func:`concat` resulting in a ``ValueError`` when at least one of both inputs had a non-unique index (:issue:`36263`) - Bug in :meth:`DataFrame.merge` and :meth:`pandas.merge` returning inconsistent ordering in result for ``how=right`` and ``how=left`` (:issue:`35382`) -- Bug in :func:`merge_ordered` wasn't able to handle list-like ``left_by`` or ``right_by`` (:issue:`35269`) +- Bug in :func:`merge_ordered` couldn't handle list-like ``left_by`` or ``right_by`` (:issue:`35269`) Sparse ^^^^^^