|
2 | 2 | import pytest
|
3 | 3 |
|
4 | 4 | import pandas as pd
|
5 |
| -from pandas import DataFrame, Index, MultiIndex, Series, concat, merge |
| 5 | +from pandas import ( |
| 6 | + Categorical, |
| 7 | + DataFrame, |
| 8 | + Index, |
| 9 | + MultiIndex, |
| 10 | + Series, |
| 11 | + Timestamp, |
| 12 | + concat, |
| 13 | + merge, |
| 14 | +) |
6 | 15 | import pandas._testing as tm
|
7 | 16 | from pandas.tests.reshape.merge.test_merge import NGROUPS, N, get_test_data
|
8 | 17 |
|
@@ -693,8 +702,8 @@ def test_join_datetime_string(self):
|
693 | 702 | result = dfb.join(dfa.set_index(["x", "y"]), on=["x", "y"])
|
694 | 703 | expected = DataFrame(
|
695 | 704 | [
|
696 |
| - [pd.Timestamp("2012-08-02 00:00:00"), "J", 1, 15], |
697 |
| - [pd.Timestamp("2013-04-06 00:00:00"), "L", 2, 20], |
| 705 | + [Timestamp("2012-08-02 00:00:00"), "J", 1, 15], |
| 706 | + [Timestamp("2013-04-06 00:00:00"), "L", 2, 20], |
698 | 707 | ],
|
699 | 708 | index=[2, 4],
|
700 | 709 | columns=["x", "y", "z", "a"],
|
@@ -831,3 +840,44 @@ def test_join_multiindex_one_level(join_type):
|
831 | 840 | index=pd.MultiIndex.from_tuples([(2, 1)], names=["b", "a"]),
|
832 | 841 | )
|
833 | 842 | tm.assert_frame_equal(result, expected)
|
| 843 | + |
| 844 | + |
| 845 | +@pytest.mark.parametrize( |
| 846 | + "categories, values", |
| 847 | + [ |
| 848 | + (["Y", "X"], ["Y", "X", "X"]), |
| 849 | + ([2, 1], [2, 1, 1]), |
| 850 | + ([2.5, 1.5], [2.5, 1.5, 1.5]), |
| 851 | + ( |
| 852 | + [Timestamp("2020-12-31"), Timestamp("2019-12-31")], |
| 853 | + [Timestamp("2020-12-31"), Timestamp("2019-12-31"), Timestamp("2019-12-31")], |
| 854 | + ), |
| 855 | + ], |
| 856 | +) |
| 857 | +def test_join_multiindex_not_alphabetical_categorical(categories, values): |
| 858 | + # GH#38502 |
| 859 | + left = DataFrame( |
| 860 | + { |
| 861 | + "first": ["A", "A"], |
| 862 | + "second": Categorical(categories, categories=categories), |
| 863 | + "value": [1, 2], |
| 864 | + } |
| 865 | + ).set_index(["first", "second"]) |
| 866 | + right = DataFrame( |
| 867 | + { |
| 868 | + "first": ["A", "A", "B"], |
| 869 | + "second": Categorical(values, categories=categories), |
| 870 | + "value": [3, 4, 5], |
| 871 | + } |
| 872 | + ).set_index(["first", "second"]) |
| 873 | + result = left.join(right, lsuffix="_left", rsuffix="_right") |
| 874 | + |
| 875 | + expected = DataFrame( |
| 876 | + { |
| 877 | + "first": ["A", "A"], |
| 878 | + "second": Categorical(categories, categories=categories), |
| 879 | + "value_left": [1, 2], |
| 880 | + "value_right": [3, 4], |
| 881 | + } |
| 882 | + ).set_index(["first", "second"]) |
| 883 | + tm.assert_frame_equal(result, expected) |
0 commit comments