|
9 | 9 | from pandas import (DataFrame, concat,
|
10 | 10 | read_csv, isnull, Series, date_range,
|
11 | 11 | Index, Panel, MultiIndex, Timestamp,
|
12 |
| - DatetimeIndex) |
| 12 | + DatetimeIndex, Categorical) |
| 13 | +from pandas.types.concat import union_categoricals |
13 | 14 | from pandas.util import testing as tm
|
14 | 15 | from pandas.util.testing import (assert_frame_equal,
|
15 | 16 | makeCustomDataframe as mkdf,
|
@@ -919,6 +920,54 @@ def test_concat_keys_with_none(self):
|
919 | 920 | keys=['b', 'c', 'd', 'e'])
|
920 | 921 | tm.assert_frame_equal(result, expected)
|
921 | 922 |
|
| 923 | + def test_union_categorical(self): |
| 924 | + # GH 13361 |
| 925 | + data = [ |
| 926 | + (list('abc'), list('abd'), list('abcabd')), |
| 927 | + ([0, 1, 2], [2, 3, 4], [0, 1, 2, 2, 3, 4]), |
| 928 | + ([0, 1.2, 2], [2, 3.4, 4], [0, 1.2, 2, 2, 3.4, 4]), |
| 929 | + |
| 930 | + (pd.date_range('2014-01-01', '2014-01-05'), |
| 931 | + pd.date_range('2014-01-06', '2014-01-07'), |
| 932 | + pd.date_range('2014-01-01', '2014-01-07')), |
| 933 | + |
| 934 | + (pd.date_range('2014-01-01', '2014-01-05', tz='US/Central'), |
| 935 | + pd.date_range('2014-01-06', '2014-01-07', tz='US/Central'), |
| 936 | + pd.date_range('2014-01-01', '2014-01-07', tz='US/Central')), |
| 937 | + |
| 938 | + (pd.period_range('2014-01-01', '2014-01-05'), |
| 939 | + pd.period_range('2014-01-06', '2014-01-07'), |
| 940 | + pd.period_range('2014-01-01', '2014-01-07')), |
| 941 | + ] |
| 942 | + |
| 943 | + for a, b, combined in data: |
| 944 | + result = union_categoricals([Categorical(a), Categorical(b)]) |
| 945 | + expected = Categorical(combined) |
| 946 | + tm.assert_categorical_equal(result, expected, |
| 947 | + check_category_order=True) |
| 948 | + |
| 949 | + # new categories ordered by appearance |
| 950 | + s = Categorical(['x', 'y', 'z']) |
| 951 | + s2 = Categorical(['a', 'b', 'c']) |
| 952 | + result = union_categoricals([s, s2]).categories |
| 953 | + expected = Index(['x', 'y', 'z', 'a', 'b', 'c']) |
| 954 | + tm.assert_index_equal(result, expected) |
| 955 | + |
| 956 | + # can't be ordered |
| 957 | + s = Categorical([0, 1.2, 2], ordered=True) |
| 958 | + s2 = Categorical([0, 1.2, 2], ordered=True) |
| 959 | + with tm.assertRaises(TypeError): |
| 960 | + union_categoricals([s, s2]) |
| 961 | + |
| 962 | + # must exactly match types |
| 963 | + s = Categorical([0, 1.2, 2]) |
| 964 | + s2 = Categorical([2, 3, 4]) |
| 965 | + with tm.assertRaises(TypeError): |
| 966 | + union_categoricals([s, s2]) |
| 967 | + |
| 968 | + with tm.assertRaises(ValueError): |
| 969 | + union_categoricals([]) |
| 970 | + |
922 | 971 | def test_concat_bug_1719(self):
|
923 | 972 | ts1 = tm.makeTimeSeries()
|
924 | 973 | ts2 = tm.makeTimeSeries()[::2]
|
|
0 commit comments