diff --git a/pandas/indexes/category.py b/pandas/indexes/category.py index 3b7c660f5faa1..1496f3b69aa5a 100644 --- a/pandas/indexes/category.py +++ b/pandas/indexes/category.py @@ -8,6 +8,7 @@ deprecate_kwarg) from pandas.core.config import get_option from pandas.indexes.base import Index, _index_shared_docs +from pandas.types.concat import union_categoricals import pandas.core.base as base import pandas.core.common as com import pandas.core.missing as missing @@ -575,6 +576,33 @@ def append(self, other): codes = np.concatenate([c.codes for c in to_concat]) return self._create_from_codes(codes, name=name) + def _join_non_unique(self, other, how='left', return_indexers=False): + """ + Must be overridden because np.putmask() does not work on Categorical. + """ + + from pandas.tools.merge import _get_join_indexers + + left_idx, right_idx = _get_join_indexers([self.values], + [other._values], how=how, + sort=True) + + left_idx = com._ensure_platform_int(left_idx) + right_idx = com._ensure_platform_int(right_idx) + + take_left = left_idx != -1 + + join_index = union_categoricals([self.values[left_idx[take_left]], + other._values[right_idx[~take_left]]], + masks=[take_left, ~take_left]) + + join_index = self._wrap_joined_index(join_index, other) + + if return_indexers: + return join_index, left_idx, right_idx + else: + return join_index + @classmethod def _add_comparison_methods(cls): """ add in comparison methods """ diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index d6f7493bb25f9..56387d054dbdd 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -225,9 +225,8 @@ def test_copy_name(self): s1 = Series(2, index=first) s2 = Series(3, index=second[:-1]) - if not isinstance(index, CategoricalIndex): # See GH13365 - s3 = s1 * s2 - self.assertEqual(s3.index.name, 'mario') + s3 = s1 * s2 + self.assertEqual(s3.index.name, 'mario') def test_ensure_copied_data(self): # Check the "copy" argument of each Index.__new__ is honoured diff --git a/pandas/types/concat.py b/pandas/types/concat.py index 53db9ddf79a5c..322c6820e4c0a 100644 --- a/pandas/types/concat.py +++ b/pandas/types/concat.py @@ -201,7 +201,7 @@ def convert_categorical(x): return Categorical(concatted, rawcats) -def union_categoricals(to_union): +def union_categoricals(to_union, masks=None): """ Combine list-like of Categoricals, unioning categories. All must have the same dtype, and none can be ordered. @@ -211,6 +211,10 @@ def union_categoricals(to_union): Parameters ---------- to_union : list-like of Categoricals + masks: list-like of boolean arrays, all of same shape + They indicate where to position the values: their shape will be the + shape of the returned array. If None, members of "to_union" will be + just concatenated. Returns ------- @@ -243,11 +247,17 @@ def union_categoricals(to_union): unique_cats = cats.append([c.categories for c in to_union[1:]]).unique() categories = Index(unique_cats) - new_codes = [] - for c in to_union: - indexer = categories.get_indexer(c.categories) - new_codes.append(indexer.take(c.codes)) - codes = np.concatenate(new_codes) + if masks is None: + new_codes = [] + for c in to_union: + indexer = categories.get_indexer(c.categories) + new_codes.append(indexer.take(c.codes)) + codes = np.concatenate(new_codes) + else: + codes = np.empty(shape=masks[0].shape, dtype=first.codes.dtype) + for c, mask in zip(to_union, masks): + indexer = categories.get_indexer(c.categories) + codes[mask] = indexer.take(c.codes) return Categorical(codes, categories=categories, ordered=False, fastpath=True)