|
26 | 26 | Interval, IntervalIndex)
|
27 | 27 | from pandas.compat import range, lrange, u, PY3, PYPY
|
28 | 28 | from pandas.core.config import option_context
|
| 29 | +from pandas.core.categorical import _recode_for_categories |
29 | 30 |
|
30 | 31 |
|
31 | 32 | class TestCategorical(object):
|
@@ -1063,6 +1064,67 @@ def test_rename_categories(self):
|
1063 | 1064 | with pytest.raises(ValueError):
|
1064 | 1065 | cat.rename_categories([1, 2])
|
1065 | 1066 |
|
| 1067 | + @pytest.mark.parametrize('codes, old, new, expected', [ |
| 1068 | + ([0, 1], ['a', 'b'], ['a', 'b'], [0, 1]), |
| 1069 | + ([0, 1], ['b', 'a'], ['b', 'a'], [0, 1]), |
| 1070 | + ([0, 1], ['a', 'b'], ['b', 'a'], [1, 0]), |
| 1071 | + ([0, 1], ['b', 'a'], ['a', 'b'], [1, 0]), |
| 1072 | + ([0, 1, 0, 1], ['a', 'b'], ['a', 'b', 'c'], [0, 1, 0, 1]), |
| 1073 | + ([0, 1, 2, 2], ['a', 'b', 'c'], ['a', 'b'], [0, 1, -1, -1]), |
| 1074 | + ([0, 1, -1], ['a', 'b', 'c'], ['a', 'b', 'c'], [0, 1, -1]), |
| 1075 | + ([0, 1, -1], ['a', 'b', 'c'], ['b'], [-1, 0, -1]), |
| 1076 | + ([0, 1, -1], ['a', 'b', 'c'], ['d'], [-1, -1, -1]), |
| 1077 | + ([0, 1, -1], ['a', 'b', 'c'], [], [-1, -1, -1]), |
| 1078 | + ([-1, -1], [], ['a', 'b'], [-1, -1]), |
| 1079 | + ([1, 0], ['b', 'a'], ['a', 'b'], [0, 1]), |
| 1080 | + ]) |
| 1081 | + def test_recode_to_categories(self, codes, old, new, expected): |
| 1082 | + codes = np.asanyarray(codes, dtype=np.int8) |
| 1083 | + expected = np.asanyarray(expected, dtype=np.int8) |
| 1084 | + old = Index(old) |
| 1085 | + new = Index(new) |
| 1086 | + result = _recode_for_categories(codes, old, new) |
| 1087 | + tm.assert_numpy_array_equal(result, expected) |
| 1088 | + |
| 1089 | + def test_recode_to_categories_large(self): |
| 1090 | + N = 1000 |
| 1091 | + codes = np.arange(N) |
| 1092 | + old = Index(codes) |
| 1093 | + expected = np.arange(N - 1, -1, -1, dtype=np.int16) |
| 1094 | + new = Index(expected) |
| 1095 | + result = _recode_for_categories(codes, old, new) |
| 1096 | + tm.assert_numpy_array_equal(result, expected) |
| 1097 | + |
| 1098 | + @pytest.mark.parametrize('values, categories, new_categories', [ |
| 1099 | + # No NaNs, same cats, same order |
| 1100 | + (['a', 'b', 'a'], ['a', 'b'], ['a', 'b'],), |
| 1101 | + # No NaNs, same cats, different order |
| 1102 | + (['a', 'b', 'a'], ['a', 'b'], ['b', 'a'],), |
| 1103 | + # Same, unsorted |
| 1104 | + (['b', 'a', 'a'], ['a', 'b'], ['a', 'b'],), |
| 1105 | + # No NaNs, same cats, different order |
| 1106 | + (['b', 'a', 'a'], ['a', 'b'], ['b', 'a'],), |
| 1107 | + # NaNs |
| 1108 | + (['a', 'b', 'c'], ['a', 'b'], ['a', 'b']), |
| 1109 | + (['a', 'b', 'c'], ['a', 'b'], ['b', 'a']), |
| 1110 | + (['b', 'a', 'c'], ['a', 'b'], ['a', 'b']), |
| 1111 | + (['b', 'a', 'c'], ['a', 'b'], ['a', 'b']), |
| 1112 | + # Introduce NaNs |
| 1113 | + (['a', 'b', 'c'], ['a', 'b'], ['a']), |
| 1114 | + (['a', 'b', 'c'], ['a', 'b'], ['b']), |
| 1115 | + (['b', 'a', 'c'], ['a', 'b'], ['a']), |
| 1116 | + (['b', 'a', 'c'], ['a', 'b'], ['a']), |
| 1117 | + # No overlap |
| 1118 | + (['a', 'b', 'c'], ['a', 'b'], ['d', 'e']), |
| 1119 | + ]) |
| 1120 | + @pytest.mark.parametrize('ordered', [True, False]) |
| 1121 | + def test_set_categories_many(self, values, categories, new_categories, |
| 1122 | + ordered): |
| 1123 | + c = Categorical(values, categories) |
| 1124 | + expected = Categorical(values, new_categories, ordered) |
| 1125 | + result = c.set_categories(new_categories, ordered=ordered) |
| 1126 | + tm.assert_categorical_equal(result, expected) |
| 1127 | + |
1066 | 1128 | def test_reorder_categories(self):
|
1067 | 1129 | cat = Categorical(["a", "b", "c", "a"], ordered=True)
|
1068 | 1130 | old = cat.copy()
|
|
0 commit comments