Skip to content

Commit cbe8b7f

Browse files
svenharristm9k1
authored andcommitted
Merge asof with nans (pandas-dev#23190)
1 parent 8b64d56 commit cbe8b7f

File tree

3 files changed

+37
-5
lines changed

3 files changed

+37
-5
lines changed

doc/source/whatsnew/v0.24.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -973,6 +973,7 @@ Reshaping
973973
- Bug in :func:`merge` when merging ``datetime64[ns, tz]`` data that contained a DST transition (:issue:`18885`)
974974
- Bug in :func:`merge_asof` when merging on float values within defined tolerance (:issue:`22981`)
975975
- Bug in :func:`pandas.concat` when concatenating a multicolumn DataFrame with tz-aware data against a DataFrame with a different number of columns (:issue`22796`)
976+
- Bug in :func:`merge_asof` where confusing error message raised when attempting to merge with missing values (:issue:`23189`)
976977

977978
.. _whatsnew_0240.bug_fixes.sparse:
978979

pandas/core/reshape/merge.py

+14-5
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636
ensure_float64,
3737
ensure_object,
3838
_get_dtype)
39-
from pandas.core.dtypes.missing import na_value_for_dtype
39+
from pandas.core.dtypes.missing import na_value_for_dtype, isnull
4040
from pandas.core.internals import (items_overlap_with_suffix,
4141
concatenate_block_managers)
4242
from pandas.util._decorators import Appender, Substitution
@@ -1390,12 +1390,21 @@ def flip(xs):
13901390
self.right_join_keys[-1])
13911391
tolerance = self.tolerance
13921392

1393-
# we required sortedness in the join keys
1394-
msg = "{side} keys must be sorted"
1393+
# we required sortedness and non-missingness in the join keys
1394+
msg_sorted = "{side} keys must be sorted"
1395+
msg_missings = "Merge keys contain null values on {side} side"
1396+
13951397
if not Index(left_values).is_monotonic:
1396-
raise ValueError(msg.format(side='left'))
1398+
if isnull(left_values).sum() > 0:
1399+
raise ValueError(msg_missings.format(side='left'))
1400+
else:
1401+
raise ValueError(msg_sorted.format(side='left'))
1402+
13971403
if not Index(right_values).is_monotonic:
1398-
raise ValueError(msg.format(side='right'))
1404+
if isnull(right_values).sum() > 0:
1405+
raise ValueError(msg_missings.format(side='right'))
1406+
else:
1407+
raise ValueError(msg_sorted.format(side='right'))
13991408

14001409
# initial type conversion as needed
14011410
if needs_i8_conversion(left_values):

pandas/tests/reshape/merge/test_merge_asof.py

+22
Original file line numberDiff line numberDiff line change
@@ -1007,3 +1007,25 @@ def test_merge_datatype_error(self):
10071007

10081008
with tm.assert_raises_regex(MergeError, msg):
10091009
merge_asof(left, right, on='a')
1010+
1011+
def test_merge_on_nans_int(self):
1012+
# 23189
1013+
msg = "Merge keys contain null values on left side"
1014+
left = pd.DataFrame({'a': [1.0, 5.0, 10.0, 12.0, np.nan],
1015+
'left_val': ['a', 'b', 'c', 'd', 'e']})
1016+
right = pd.DataFrame({'a': [1.0, 5.0, 10.0, 12.0],
1017+
'right_val': [1, 6, 11, 15]})
1018+
1019+
with tm.assert_raises_regex(ValueError, msg):
1020+
merge_asof(left, right, on='a')
1021+
1022+
def test_merge_on_nans_datetime(self):
1023+
# 23189
1024+
msg = "Merge keys contain null values on right side"
1025+
left = pd.DataFrame({"a": pd.date_range('20130101', periods=5)})
1026+
date_vals = pd.date_range('20130102', periods=5)\
1027+
.append(pd.Index([None]))
1028+
right = pd.DataFrame({"a": date_vals})
1029+
1030+
with tm.assert_raises_regex(ValueError, msg):
1031+
merge_asof(left, right, on='a')

0 commit comments

Comments
 (0)