Skip to content

BUG: Raise ValueError for non numerical join columns in merge_asof #34488

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from 29 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
660d2f5
Raise ValueError for non numerical join columns in merge_asof
phofl May 30, 2020
cafedcc
Delete unused imports
phofl May 30, 2020
fa1d909
Implement review requests
phofl May 30, 2020
34a5633
Run black pandas
phofl May 30, 2020
783bc60
Change error message
phofl May 31, 2020
e086864
Change whats new entry
phofl May 31, 2020
a4001c6
Merge branch 'master' of https://github.com/pandas-dev/pandas into 29130
phofl May 31, 2020
aaa3480
Change error type
phofl May 31, 2020
ec3147e
Move dtype check
phofl May 31, 2020
129eb77
Use raw string
phofl May 31, 2020
4680833
Move check again
phofl Jun 1, 2020
fb55c9f
Move dtype check to _get_merge_keys
phofl Jun 1, 2020
72fb8b4
Run black pandas
phofl Jun 1, 2020
0103974
Overwrite _maybe_coerce_merge_keys
phofl Jun 1, 2020
4cc8bdc
Merge branch 'master' of https://github.com/pandas-dev/pandas into 29130
phofl Jun 3, 2020
a6048a6
Merge branch 'master' of https://github.com/pandas-dev/pandas into 29130
phofl Sep 16, 2020
cdaa8ef
Move whats new entry
phofl Sep 16, 2020
47e211c
Add docs from other pr
phofl Sep 23, 2020
3129353
Merge branch 'master' of https://github.com/pandas-dev/pandas into 29130
phofl Sep 23, 2020
0242f47
Run black pandas
phofl Sep 23, 2020
ef47721
Revert "Add docs from other pr"
phofl Sep 23, 2020
109c824
Merge branch 'master' of https://github.com/pandas-dev/pandas into 29130
phofl Dec 29, 2020
3a66f05
Move whatsnew
phofl Dec 29, 2020
da377c7
Merge branch 'master' of https://github.com/pandas-dev/pandas into 29130
phofl Jan 1, 2021
c0ce857
Move merge asof validation
phofl Jan 4, 2021
2c164c5
Remove newline
phofl Jan 4, 2021
a00eb9c
Parametrize test further
phofl Jan 4, 2021
f62a873
Merge branch 'master' of https://github.com/pandas-dev/pandas into 29130
phofl Jan 4, 2021
4782233
Add comment
phofl Jan 4, 2021
6ebb7a3
Change Error
phofl Jan 5, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -302,8 +302,7 @@ Reshaping
- Bug in :meth:`DataFrame.unstack` with missing levels led to incorrect index names (:issue:`37510`)
- Bug in :func:`join` over :class:`MultiIndex` returned wrong result, when one of both indexes had only one level (:issue:`36909`)
- Bug in :func:`concat` incorrectly casting to ``object`` dtype in some cases when one or more of the operands is empty (:issue:`38843`, :issue:`38907`)
-

- :meth:`merge_asof` raises ``ValueError`` instead of cryptic ``TypeError`` in case of non-numerical merge columns (:issue:`29130`)

Sparse
^^^^^^
Expand Down
17 changes: 17 additions & 0 deletions pandas/core/reshape/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -1708,6 +1708,23 @@ def _validate_specification(self):
if self.left_by is not None and self.right_by is None:
raise MergeError("missing right_by")

# GH#29130 Check that merge keys do not have dtype object
lo_dtype = (
self.left[self.left_on[0]].dtype
if not self.left_index
else self.left.index.dtype
)
ro_dtype = (
self.right[self.right_on[0]].dtype
if not self.right_index
else self.right.index.dtype
)
if is_object_dtype(lo_dtype) or is_object_dtype(ro_dtype):
raise ValueError(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you make this a MergeError (as everthing else is)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done, issue mentioned ValueError, but MergeError is probably better

f"Incompatible merge dtype, {repr(ro_dtype)} and "
f"{repr(lo_dtype)}, both sides must have numeric dtype"
)

# add 'by' to our key-list so we can have it in the
# output as a key
if self.left_by is not None:
Expand Down
40 changes: 38 additions & 2 deletions pandas/tests/reshape/merge/test_merge_asof.py
Original file line number Diff line number Diff line change
Expand Up @@ -1168,12 +1168,12 @@ def test_on_float_by_int(self):
tm.assert_frame_equal(result, expected)

def test_merge_datatype_error_raises(self):
msg = r"incompatible merge keys \[0\] .*, must be the same type"
msg = r"Incompatible merge dtype, .*, both sides must have numeric dtype"

left = pd.DataFrame({"left_val": [1, 5, 10], "a": ["a", "b", "c"]})
right = pd.DataFrame({"right_val": [1, 2, 3, 6, 7], "a": [1, 2, 3, 6, 7]})

with pytest.raises(MergeError, match=msg):
with pytest.raises(ValueError, match=msg):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

revert (see above)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

merge_asof(left, right, on="a")

def test_merge_datatype_categorical_error_raises(self):
Expand Down Expand Up @@ -1373,3 +1373,39 @@ def test_left_index_right_index_tolerance(self):
tolerance=Timedelta(seconds=0.5),
)
tm.assert_frame_equal(result, expected)


@pytest.mark.parametrize(
"kwargs", [{"on": "x"}, {"left_index": True, "right_index": True}]
)
@pytest.mark.parametrize(
"data",
[["2019-06-01 00:09:12", "2019-06-01 00:10:29"], [1.0, "2019-06-01 00:10:29"]],
)
def test_merge_asof_non_numerical_dtype(kwargs, data):
# GH#29130
left = pd.DataFrame({"x": data}, index=data)
right = pd.DataFrame({"x": data}, index=data)
with pytest.raises(
ValueError,
match=r"Incompatible merge dtype, .*, both sides must have numeric dtype",
):
pd.merge_asof(left, right, **kwargs)


def test_merge_asof_non_numerical_dtype_object():
# GH#29130
left = pd.DataFrame({"a": ["12", "13", "15"], "left_val1": ["a", "b", "c"]})
right = pd.DataFrame({"a": ["a", "b", "c"], "left_val": ["d", "e", "f"]})
with pytest.raises(
ValueError,
match=r"Incompatible merge dtype, .*, both sides must have numeric dtype",
):
pd.merge_asof(
left,
right,
left_on="left_val1",
right_on="a",
left_by="a",
right_by="left_val",
)