From 4bf86384296bf7300ea5b50662fa724adce527c1 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 7 Sep 2022 17:13:34 -0700 Subject: [PATCH 1/3] DEPR: allowing unknown array-likes in merge --- pandas/core/reshape/merge.py | 20 ++++++++++++++++++++ pandas/tests/reshape/merge/test_merge.py | 21 +++++++++++++++++++++ 2 files changed, 41 insertions(+) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 5052c27ea47f3..7ad2b68d8f6a6 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -1126,6 +1126,22 @@ def _get_merge_keys(self): is_lkey = lambda x: is_array_like(x) and len(x) == len(left) is_rkey = lambda x: is_array_like(x) and len(x) == len(right) + def deprecate_unknown_arraylike(obj): + # The existing code (but not docs) allow for any iterable object + # with a 'dtype' attribute. In the future, we want to restrict + # to specific array-like types. + if is_array_like(obj) and not isinstance( + obj, (np.ndarray, ExtensionArray, Index, ABCSeries) + ): + warnings.warn( + "In a future version, the 'on', 'left_on', and 'right_on' " + "keywords will only allow array-like objects that are one " + "of the following types: " + "numpy.ndarray, ExtensionArray, Index, Series.", + FutureWarning, + stacklevel=find_stack_level(inspect.currentframe()), + ) + # Note that pd.merge_asof() has separate 'on' and 'by' parameters. A # user could, for example, request 'left_index' and 'left_by'. In a # regular pd.merge(), users cannot specify both 'left_index' and @@ -1139,6 +1155,8 @@ def _get_merge_keys(self): # ugh, spaghetti re #733 if _any(self.left_on) and _any(self.right_on): for lk, rk in zip(self.left_on, self.right_on): + deprecate_unknown_arraylike(lk) + deprecate_unknown_arraylike(rk) if is_lkey(lk): lk = cast(AnyArrayLike, lk) left_keys.append(lk) @@ -1188,6 +1206,7 @@ def _get_merge_keys(self): join_names.append(left.index.name) elif _any(self.left_on): for k in self.left_on: + deprecate_unknown_arraylike(k) if is_lkey(k): k = cast(AnyArrayLike, k) left_keys.append(k) @@ -1209,6 +1228,7 @@ def _get_merge_keys(self): right_keys = [self.right.index._values] elif _any(self.right_on): for k in self.right_on: + deprecate_unknown_arraylike(k) if is_rkey(k): k = cast(AnyArrayLike, k) right_keys.append(k) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index c7d7d1b0daa50..574562f628980 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -9,6 +9,8 @@ import numpy as np import pytest +import pandas.util._test_decorators as td + from pandas.core.dtypes.common import ( is_categorical_dtype, is_object_dtype, @@ -2691,3 +2693,22 @@ def test_merge_different_index_names(): result = merge(left, right, left_on="c", right_on="d") expected = DataFrame({"a_x": [1], "a_y": 1}) tm.assert_frame_equal(result, expected) + + +@td.skip_if_no("dask") +def test_merge_on_arraylike_deprecation(): + # deprecate allowing non-standard array-likes for "on" + + left = DataFrame({"A": range(3), "B": range(1, 4)}) + right = DataFrame({"C": range(2, 5)}) + + import dask.array + + arr = dask.array.array([0, 1, 2]) # matches left["A"] + + msg = "will only allow array-like objects that are one of the following types" + with tm.assert_produces_warning(FutureWarning, match=msg): + res = merge(left, right, left_on="A", right_on=arr) + + expected = merge(left, right, left_on="A", right_on=np.array(arr)) + tm.assert_frame_equal(res, expected) From 55ad04468a830af13f138aaac70e954a745718ac Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 12 Sep 2022 15:22:54 -0700 Subject: [PATCH 2/3] whatsnew --- doc/source/whatsnew/v1.6.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.6.0.rst b/doc/source/whatsnew/v1.6.0.rst index c393b8a57f805..d1c3afb354400 100644 --- a/doc/source/whatsnew/v1.6.0.rst +++ b/doc/source/whatsnew/v1.6.0.rst @@ -92,7 +92,7 @@ Other API changes Deprecations ~~~~~~~~~~~~ -- +- Deprecated allowing array-like values for ``left_on`` or ``right_on`` other than ``np.ndarray``, :class:`ExtensionArray`, :class:`Index`, and :class:`Series` (:issue:`48454`) - .. --------------------------------------------------------------------------- From 92b3ed3fb0f1aa25d17c19cbab40e5675fa125b3 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 12 Sep 2022 16:47:24 -0700 Subject: [PATCH 3/3] lint fixup --- pandas/core/reshape/merge.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 7ad2b68d8f6a6..d3863854eebd6 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -1126,7 +1126,7 @@ def _get_merge_keys(self): is_lkey = lambda x: is_array_like(x) and len(x) == len(left) is_rkey = lambda x: is_array_like(x) and len(x) == len(right) - def deprecate_unknown_arraylike(obj): + def deprecate_unknown_arraylike(obj) -> None: # The existing code (but not docs) allow for any iterable object # with a 'dtype' attribute. In the future, we want to restrict # to specific array-like types.