From f52f278dcc1e85a6703849289aeb75423b78abef Mon Sep 17 00:00:00 2001 From: Ziad Kermadi Date: Sun, 12 Nov 2023 15:23:05 +0100 Subject: [PATCH 1/7] Fixes #55884- Added else for invalid fill_method --- pandas/core/reshape/merge.py | 7 +++++- .../tests/reshape/merge/test_merge_ordered.py | 23 +++++++++++++++++++ 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index f83a12b268b22..c35c2ccd1a5e7 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -1863,9 +1863,14 @@ def get_result(self, copy: bool | None = True) -> DataFrame: right_indexer = cast("npt.NDArray[np.intp]", right_indexer) left_join_indexer = libjoin.ffill_indexer(left_indexer) right_join_indexer = libjoin.ffill_indexer(right_indexer) - else: + elif self.fill_method is None: left_join_indexer = left_indexer right_join_indexer = right_indexer + else: + raise ValueError( + f"fill_method must be one of ['ffill']. " + f"Got '{self.fill_method}' instead." + ) result = self._reindex_and_concat( join_index, left_join_indexer, right_join_indexer, copy=copy diff --git a/pandas/tests/reshape/merge/test_merge_ordered.py b/pandas/tests/reshape/merge/test_merge_ordered.py index cfb4e92fb45cd..50c586cb37373 100644 --- a/pandas/tests/reshape/merge/test_merge_ordered.py +++ b/pandas/tests/reshape/merge/test_merge_ordered.py @@ -1,3 +1,5 @@ +import re + import numpy as np import pytest @@ -209,3 +211,24 @@ def test_elements_not_in_by_but_in_df(self): msg = r"\{'h'\} not found in left columns" with pytest.raises(KeyError, match=msg): merge_ordered(left, right, on="E", left_by=["G", "h"]) + + def test_ffill_5584(self): + # GH 5584 + df1 = DataFrame({"key": ["a", "c", "e"], "lvalue": [1, 2, 3]}) + df2 = DataFrame({"key": ["b", "c", "d", "f"], "rvalue": [4, 5, 6, 7]}) + + for valid_method in ["ffill", None]: + try: + merge_ordered(df1, df2, on="key", fill_method=valid_method) + except Exception: + pytest.fail(f"Unexpected error with valid fill_method: {valid_method}") + + for invalid_method in ["linear", "carrot"]: + with pytest.raises( + ValueError, + match=re.escape( + "fill_method must be one of ['ffill']. " + f"Got '{invalid_method}' instead." + ), + ): + merge_ordered(df1, df2, on="key", fill_method=invalid_method) From 0b9911409f5f7f29c05c13c2401545aa9108c7e0 Mon Sep 17 00:00:00 2001 From: Ziad Kermadi Date: Sun, 12 Nov 2023 15:23:05 +0100 Subject: [PATCH 2/7] Fixes #55884- Added else for invalid fill_method --- pandas/core/reshape/merge.py | 7 +++++- .../tests/reshape/merge/test_merge_ordered.py | 23 +++++++++++++++++++ 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index f83a12b268b22..c35c2ccd1a5e7 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -1863,9 +1863,14 @@ def get_result(self, copy: bool | None = True) -> DataFrame: right_indexer = cast("npt.NDArray[np.intp]", right_indexer) left_join_indexer = libjoin.ffill_indexer(left_indexer) right_join_indexer = libjoin.ffill_indexer(right_indexer) - else: + elif self.fill_method is None: left_join_indexer = left_indexer right_join_indexer = right_indexer + else: + raise ValueError( + f"fill_method must be one of ['ffill']. " + f"Got '{self.fill_method}' instead." + ) result = self._reindex_and_concat( join_index, left_join_indexer, right_join_indexer, copy=copy diff --git a/pandas/tests/reshape/merge/test_merge_ordered.py b/pandas/tests/reshape/merge/test_merge_ordered.py index cfb4e92fb45cd..50c586cb37373 100644 --- a/pandas/tests/reshape/merge/test_merge_ordered.py +++ b/pandas/tests/reshape/merge/test_merge_ordered.py @@ -1,3 +1,5 @@ +import re + import numpy as np import pytest @@ -209,3 +211,24 @@ def test_elements_not_in_by_but_in_df(self): msg = r"\{'h'\} not found in left columns" with pytest.raises(KeyError, match=msg): merge_ordered(left, right, on="E", left_by=["G", "h"]) + + def test_ffill_5584(self): + # GH 5584 + df1 = DataFrame({"key": ["a", "c", "e"], "lvalue": [1, 2, 3]}) + df2 = DataFrame({"key": ["b", "c", "d", "f"], "rvalue": [4, 5, 6, 7]}) + + for valid_method in ["ffill", None]: + try: + merge_ordered(df1, df2, on="key", fill_method=valid_method) + except Exception: + pytest.fail(f"Unexpected error with valid fill_method: {valid_method}") + + for invalid_method in ["linear", "carrot"]: + with pytest.raises( + ValueError, + match=re.escape( + "fill_method must be one of ['ffill']. " + f"Got '{invalid_method}' instead." + ), + ): + merge_ordered(df1, df2, on="key", fill_method=invalid_method) From 36b1c2567f339111f99aeba70217764d77ffffa6 Mon Sep 17 00:00:00 2001 From: Ziad Kermadi Date: Mon, 13 Nov 2023 21:02:26 +0100 Subject: [PATCH 3/7] Update pandas/tests/reshape/merge/test_merge_ordered.py Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- pandas/tests/reshape/merge/test_merge_ordered.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/reshape/merge/test_merge_ordered.py b/pandas/tests/reshape/merge/test_merge_ordered.py index 50c586cb37373..0aec5309483fe 100644 --- a/pandas/tests/reshape/merge/test_merge_ordered.py +++ b/pandas/tests/reshape/merge/test_merge_ordered.py @@ -212,7 +212,7 @@ def test_elements_not_in_by_but_in_df(self): with pytest.raises(KeyError, match=msg): merge_ordered(left, right, on="E", left_by=["G", "h"]) - def test_ffill_5584(self): + def test_ffill_validate_fill_method(self): # GH 5584 df1 = DataFrame({"key": ["a", "c", "e"], "lvalue": [1, 2, 3]}) df2 = DataFrame({"key": ["b", "c", "d", "f"], "rvalue": [4, 5, 6, 7]}) From 534521e7dc1cba0fb5cf2bed74dc776961bf35e3 Mon Sep 17 00:00:00 2001 From: Ziad Kermadi Date: Mon, 13 Nov 2023 21:02:35 +0100 Subject: [PATCH 4/7] Update pandas/tests/reshape/merge/test_merge_ordered.py Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- pandas/tests/reshape/merge/test_merge_ordered.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/reshape/merge/test_merge_ordered.py b/pandas/tests/reshape/merge/test_merge_ordered.py index 0aec5309483fe..fda4a2eb91fe1 100644 --- a/pandas/tests/reshape/merge/test_merge_ordered.py +++ b/pandas/tests/reshape/merge/test_merge_ordered.py @@ -213,7 +213,7 @@ def test_elements_not_in_by_but_in_df(self): merge_ordered(left, right, on="E", left_by=["G", "h"]) def test_ffill_validate_fill_method(self): - # GH 5584 + # GH 55884 df1 = DataFrame({"key": ["a", "c", "e"], "lvalue": [1, 2, 3]}) df2 = DataFrame({"key": ["b", "c", "d", "f"], "rvalue": [4, 5, 6, 7]}) From dd54596088e06e1f8070946016bc40d49470acb6 Mon Sep 17 00:00:00 2001 From: Ziad Kermadi Date: Mon, 13 Nov 2023 21:02:56 +0100 Subject: [PATCH 5/7] Update pandas/core/reshape/merge.py Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- pandas/core/reshape/merge.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index c35c2ccd1a5e7..103bac557d8ab 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -1868,7 +1868,7 @@ def get_result(self, copy: bool | None = True) -> DataFrame: right_join_indexer = right_indexer else: raise ValueError( - f"fill_method must be one of ['ffill']. " + "fill_method must be 'ffill' or None" f"Got '{self.fill_method}' instead." ) From c9feabc11d9be2a66be6d862f8518bf090d5cac9 Mon Sep 17 00:00:00 2001 From: Ziad Kermadi Date: Mon, 13 Nov 2023 21:36:43 +0100 Subject: [PATCH 6/7] Fixed mergeordered ValueError Message --- pandas/core/reshape/merge.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 103bac557d8ab..f8575b1b53908 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -1867,10 +1867,7 @@ def get_result(self, copy: bool | None = True) -> DataFrame: left_join_indexer = left_indexer right_join_indexer = right_indexer else: - raise ValueError( - "fill_method must be 'ffill' or None" - f"Got '{self.fill_method}' instead." - ) + raise ValueError("fill_method must be 'ffill' or None") result = self._reindex_and_concat( join_index, left_join_indexer, right_join_indexer, copy=copy From 4e003ac37db600e0ed7fd339af12ec780652f640 Mon Sep 17 00:00:00 2001 From: Ziad Kermadi Date: Mon, 13 Nov 2023 21:49:35 +0100 Subject: [PATCH 7/7] Use pytest fixtures & parametrize for tests --- .../tests/reshape/merge/test_merge_ordered.py | 25 +++++-------------- 1 file changed, 6 insertions(+), 19 deletions(-) diff --git a/pandas/tests/reshape/merge/test_merge_ordered.py b/pandas/tests/reshape/merge/test_merge_ordered.py index fda4a2eb91fe1..abd61026b4e37 100644 --- a/pandas/tests/reshape/merge/test_merge_ordered.py +++ b/pandas/tests/reshape/merge/test_merge_ordered.py @@ -212,23 +212,10 @@ def test_elements_not_in_by_but_in_df(self): with pytest.raises(KeyError, match=msg): merge_ordered(left, right, on="E", left_by=["G", "h"]) - def test_ffill_validate_fill_method(self): + @pytest.mark.parametrize("invalid_method", ["linear", "carrot"]) + def test_ffill_validate_fill_method(self, left, right, invalid_method): # GH 55884 - df1 = DataFrame({"key": ["a", "c", "e"], "lvalue": [1, 2, 3]}) - df2 = DataFrame({"key": ["b", "c", "d", "f"], "rvalue": [4, 5, 6, 7]}) - - for valid_method in ["ffill", None]: - try: - merge_ordered(df1, df2, on="key", fill_method=valid_method) - except Exception: - pytest.fail(f"Unexpected error with valid fill_method: {valid_method}") - - for invalid_method in ["linear", "carrot"]: - with pytest.raises( - ValueError, - match=re.escape( - "fill_method must be one of ['ffill']. " - f"Got '{invalid_method}' instead." - ), - ): - merge_ordered(df1, df2, on="key", fill_method=invalid_method) + with pytest.raises( + ValueError, match=re.escape("fill_method must be 'ffill' or None") + ): + merge_ordered(left, right, on="key", fill_method=invalid_method)