|
26 | 26 | Interval, IntervalIndex)
|
27 | 27 | from pandas.compat import range, lrange, u, PY3, PYPY
|
28 | 28 | from pandas.core.config import option_context
|
| 29 | +from pandas.core.categorical import _recode_for_categories |
29 | 30 |
|
30 | 31 |
|
31 | 32 | class TestCategorical(object):
|
@@ -963,6 +964,68 @@ def test_rename_categories(self):
|
963 | 964 | with pytest.raises(ValueError):
|
964 | 965 | cat.rename_categories([1, 2])
|
965 | 966 |
|
| 967 | + @pytest.mark.parametrize('codes, old, new, expected', [ |
| 968 | + ([0, 1], ['a', 'b'], ['a', 'b'], [0, 1]), |
| 969 | + ([0, 1], ['b', 'a'], ['b', 'a'], [0, 1]), |
| 970 | + ([0, 1], ['a', 'b'], ['b', 'a'], [1, 0]), |
| 971 | + ([0, 1], ['b', 'a'], ['a', 'b'], [1, 0]), |
| 972 | + ([0, 1, 0, 1], ['a', 'b'], ['a', 'b', 'c'], [0, 1, 0, 1]), |
| 973 | + ([0, 1, 2, 2], ['a', 'b', 'c'], ['a', 'b'], [0, 1, -1, -1]), |
| 974 | + ([0, 1, -1], ['a', 'b', 'c'], ['a', 'b', 'c'], [0, 1, -1]), |
| 975 | + ([0, 1, -1], ['a', 'b', 'c'], ['b'], [-1, 0, -1]), |
| 976 | + ([0, 1, -1], ['a', 'b', 'c'], ['d'], [-1, -1, -1]), |
| 977 | + ([0, 1, -1], ['a', 'b', 'c'], [], [-1, -1, -1]), |
| 978 | + ([-1, -1], [], ['a', 'b'], [-1, -1]), |
| 979 | + ([1, 0], ['b', 'a'], ['a', 'b'], [0, 1]), |
| 980 | + ]) |
| 981 | + def test_recode_to_categories(self, codes, old, new, expected): |
| 982 | + codes = np.asanyarray(codes, dtype=np.int8) |
| 983 | + expected = np.asanyarray(expected, dtype=np.int8) |
| 984 | + old = Index(old) |
| 985 | + new = Index(new) |
| 986 | + result = _recode_for_categories(codes, old, new) |
| 987 | + tm.assert_numpy_array_equal(result, expected) |
| 988 | + |
| 989 | + def test_recode_to_categories_large(self): |
| 990 | + N = 1000 |
| 991 | + codes = np.arange(N) |
| 992 | + old = Index(codes) |
| 993 | + expected = np.arange(N - 1, -1, -1, dtype=np.int16) |
| 994 | + new = Index(expected) |
| 995 | + result = _recode_for_categories(codes, old, new) |
| 996 | + tm.assert_numpy_array_equal(result, expected) |
| 997 | + |
| 998 | + |
| 999 | + @pytest.mark.parametrize('values, categories, new_categories', [ |
| 1000 | + # No NaNs, same cats, same order |
| 1001 | + (['a', 'b', 'a'], ['a', 'b'], ['a', 'b'],), |
| 1002 | + # No NaNs, same cats, different order |
| 1003 | + (['a', 'b', 'a'], ['a', 'b'], ['b', 'a'],), |
| 1004 | + # Same, unsorted |
| 1005 | + (['b', 'a', 'a'], ['a', 'b'], ['a', 'b'],), |
| 1006 | + # No NaNs, same cats, different order |
| 1007 | + (['b', 'a', 'a'], ['a', 'b'], ['b', 'a'],), |
| 1008 | + # NaNs |
| 1009 | + (['a', 'b', 'c'], ['a', 'b'], ['a', 'b']), |
| 1010 | + (['a', 'b', 'c'], ['a', 'b'], ['b', 'a']), |
| 1011 | + (['b', 'a', 'c'], ['a', 'b'], ['a', 'b']), |
| 1012 | + (['b', 'a', 'c'], ['a', 'b'], ['a', 'b']), |
| 1013 | + # Introduce NaNs |
| 1014 | + (['a', 'b', 'c'], ['a', 'b'], ['a']), |
| 1015 | + (['a', 'b', 'c'], ['a', 'b'], ['b']), |
| 1016 | + (['b', 'a', 'c'], ['a', 'b'], ['a']), |
| 1017 | + (['b', 'a', 'c'], ['a', 'b'], ['a']), |
| 1018 | + # No overlap |
| 1019 | + (['a', 'b', 'c'], ['a', 'b'], ['d', 'e']), |
| 1020 | + ]) |
| 1021 | + @pytest.mark.parametrize('ordered', [True, False]) |
| 1022 | + def test_set_categories_many(self, values, categories, new_categories, |
| 1023 | + ordered): |
| 1024 | + c = Categorical(values, categories) |
| 1025 | + expected = Categorical(values, new_categories, ordered) |
| 1026 | + result = c.set_categories(new_categories, ordered=ordered) |
| 1027 | + tm.assert_categorical_equal(result, expected) |
| 1028 | + |
966 | 1029 | def test_reorder_categories(self):
|
967 | 1030 | cat = Categorical(["a", "b", "c", "a"], ordered=True)
|
968 | 1031 | old = cat.copy()
|
|
0 commit comments