Skip to content

Commit 7f4a72a

Browse files
committed
BUG: alignment of CategoricalIndex
Closes pandas-dev#13365
1 parent ab116a7 commit 7f4a72a

File tree

3 files changed

+46
-9
lines changed

3 files changed

+46
-9
lines changed

pandas/indexes/category.py

+28
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
deprecate_kwarg)
99
from pandas.core.config import get_option
1010
from pandas.indexes.base import Index, _index_shared_docs
11+
from pandas.types.concat import union_categoricals
1112
import pandas.core.base as base
1213
import pandas.core.common as com
1314
import pandas.core.missing as missing
@@ -575,6 +576,33 @@ def append(self, other):
575576
codes = np.concatenate([c.codes for c in to_concat])
576577
return self._create_from_codes(codes, name=name)
577578

579+
def _join_non_unique(self, other, how='left', return_indexers=False):
580+
"""
581+
Must be overridden because np.putmask() does not work on Categorical.
582+
"""
583+
584+
from pandas.tools.merge import _get_join_indexers
585+
586+
left_idx, right_idx = _get_join_indexers([self.values],
587+
[other._values], how=how,
588+
sort=True)
589+
590+
left_idx = com._ensure_platform_int(left_idx)
591+
right_idx = com._ensure_platform_int(right_idx)
592+
593+
take_left = left_idx != -1
594+
595+
join_index = union_categoricals([self.values[left_idx[take_left]],
596+
other._values[right_idx[~take_left]]],
597+
masks=[take_left, ~take_left])
598+
599+
join_index = self._wrap_joined_index(join_index, other)
600+
601+
if return_indexers:
602+
return join_index, left_idx, right_idx
603+
else:
604+
return join_index
605+
578606
@classmethod
579607
def _add_comparison_methods(cls):
580608
""" add in comparison methods """

pandas/tests/indexes/common.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -225,9 +225,8 @@ def test_copy_name(self):
225225

226226
s1 = Series(2, index=first)
227227
s2 = Series(3, index=second[:-1])
228-
if not isinstance(index, CategoricalIndex): # See GH13365
229-
s3 = s1 * s2
230-
self.assertEqual(s3.index.name, 'mario')
228+
s3 = s1 * s2
229+
self.assertEqual(s3.index.name, 'mario')
231230

232231
def test_ensure_copied_data(self):
233232
# Check the "copy" argument of each Index.__new__ is honoured

pandas/types/concat.py

+16-6
Original file line numberDiff line numberDiff line change
@@ -201,7 +201,7 @@ def convert_categorical(x):
201201
return Categorical(concatted, rawcats)
202202

203203

204-
def union_categoricals(to_union):
204+
def union_categoricals(to_union, masks=None):
205205
"""
206206
Combine list-like of Categoricals, unioning categories. All
207207
must have the same dtype, and none can be ordered.
@@ -211,6 +211,10 @@ def union_categoricals(to_union):
211211
Parameters
212212
----------
213213
to_union : list-like of Categoricals
214+
masks: list-like of boolean arrays, all of same shape
215+
They indicate where to position the values: their shape will be the
216+
shape of the returned array. If None, members of "to_union" will be
217+
just concatenated.
214218
215219
Returns
216220
-------
@@ -243,11 +247,17 @@ def union_categoricals(to_union):
243247
unique_cats = cats.append([c.categories for c in to_union[1:]]).unique()
244248
categories = Index(unique_cats)
245249

246-
new_codes = []
247-
for c in to_union:
248-
indexer = categories.get_indexer(c.categories)
249-
new_codes.append(indexer.take(c.codes))
250-
codes = np.concatenate(new_codes)
250+
if masks is None:
251+
new_codes = []
252+
for c in to_union:
253+
indexer = categories.get_indexer(c.categories)
254+
new_codes.append(indexer.take(c.codes))
255+
codes = np.concatenate(new_codes)
256+
else:
257+
codes = np.empty(shape=masks[0].shape, dtype=first.codes.dtype)
258+
for c, mask in zip(to_union, masks):
259+
indexer = categories.get_indexer(c.categories)
260+
codes[mask] = indexer.take(c.codes)
251261
return Categorical(codes, categories=categories, ordered=False,
252262
fastpath=True)
253263

0 commit comments

Comments
 (0)