Skip to content

Commit 71dcb80

Browse files
committed
Work in progress for new test for unordered categoricals
1 parent 919f42b commit 71dcb80

File tree

2 files changed

+23
-2
lines changed

2 files changed

+23
-2
lines changed

pandas/core/reshape/merge.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -1443,8 +1443,16 @@ def _get_merge_keys(self):
14431443
right_join_keys,
14441444
join_names) = super()._get_merge_keys()
14451445

1446-
# validate index types are the same
1446+
# validate index types are the same & are not unordered categoricals
14471447
for i, (lk, rk) in enumerate(zip(left_join_keys, right_join_keys)):
1448+
# TODO Wrong place to put this. By the time we get here, the "by"
1449+
# keys (which *can* be unordered) have been added to the join keys.
1450+
if any(is_categorical_dtype(dtype) and not dtype.ordered for
1451+
dtype in [lk, rk]):
1452+
raise MergeError("incompatible merge keys [{i}] unordered "
1453+
"category, must be ordered".format(i=i))
1454+
1455+
14481456
if not is_dtype_equal(lk.dtype, rk.dtype):
14491457
if (is_categorical_dtype(lk.dtype) and
14501458
is_categorical_dtype(rk.dtype)):

pandas/tests/reshape/merge/test_merge_asof.py

+14-1
Original file line numberDiff line numberDiff line change
@@ -1008,10 +1008,23 @@ def test_merge_datatype_error_raises(self):
10081008
def test_merge_datatype_categorical_error_raises(self):
10091009
msg = r'merge keys \[0\] both sides category, but not equal ones'
10101010

1011+
left = pd.DataFrame({'left_val': [1, 5, 10],
1012+
'a': pd.Categorical(['a', 'b', 'c'],
1013+
ordered=True)})
1014+
right = pd.DataFrame({'right_val': [1, 2, 3, 6, 7],
1015+
'a': pd.Categorical(['a', 'X', 'c', 'X', 'b'],
1016+
ordered=True)})
1017+
1018+
with pytest.raises(MergeError, match=msg):
1019+
merge_asof(left, right, on='a')
1020+
1021+
def test_merge_datatype_unordered_categorical_raises(self):
1022+
msg = r'merge keys \[0\] unordered category, must be ordered'
1023+
10111024
left = pd.DataFrame({'left_val': [1, 5, 10],
10121025
'a': pd.Categorical(['a', 'b', 'c'])})
10131026
right = pd.DataFrame({'right_val': [1, 2, 3, 6, 7],
1014-
'a': pd.Categorical(['a', 'X', 'c', 'X', 'b'])})
1027+
'a': pd.Categorical(['a', 'c', 'b', 'a', 'b'])})
10151028

10161029
with pytest.raises(MergeError, match=msg):
10171030
merge_asof(left, right, on='a')

0 commit comments

Comments
 (0)