Skip to content

BUG: alignment of CategoricalIndex #13440

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions pandas/indexes/category.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
deprecate_kwarg)
from pandas.core.config import get_option
from pandas.indexes.base import Index, _index_shared_docs
from pandas.types.concat import union_categoricals
import pandas.core.base as base
import pandas.core.common as com
import pandas.core.missing as missing
Expand Down Expand Up @@ -575,6 +576,33 @@ def append(self, other):
codes = np.concatenate([c.codes for c in to_concat])
return self._create_from_codes(codes, name=name)

def _join_non_unique(self, other, how='left', return_indexers=False):
"""
Must be overridden because np.putmask() does not work on Categorical.
"""

from pandas.tools.merge import _get_join_indexers

left_idx, right_idx = _get_join_indexers([self.values],
[other._values], how=how,
sort=True)

left_idx = com._ensure_platform_int(left_idx)
right_idx = com._ensure_platform_int(right_idx)

take_left = left_idx != -1

join_index = union_categoricals([self.values[left_idx[take_left]],
other._values[right_idx[~take_left]]],
masks=[take_left, ~take_left])

join_index = self._wrap_joined_index(join_index, other)

if return_indexers:
return join_index, left_idx, right_idx
else:
return join_index

@classmethod
def _add_comparison_methods(cls):
""" add in comparison methods """
Expand Down
5 changes: 2 additions & 3 deletions pandas/tests/indexes/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,9 +225,8 @@ def test_copy_name(self):

s1 = Series(2, index=first)
s2 = Series(3, index=second[:-1])
if not isinstance(index, CategoricalIndex): # See GH13365
s3 = s1 * s2
self.assertEqual(s3.index.name, 'mario')
s3 = s1 * s2
self.assertEqual(s3.index.name, 'mario')
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you need a LOT more tests

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I stated this when opening the PR. I will add them, no problem.


def test_ensure_copied_data(self):
# Check the "copy" argument of each Index.__new__ is honoured
Expand Down
22 changes: 16 additions & 6 deletions pandas/types/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ def convert_categorical(x):
return Categorical(concatted, rawcats)


def union_categoricals(to_union):
def union_categoricals(to_union, masks=None):
"""
Combine list-like of Categoricals, unioning categories. All
must have the same dtype, and none can be ordered.
Expand All @@ -211,6 +211,10 @@ def union_categoricals(to_union):
Parameters
----------
to_union : list-like of Categoricals
masks: list-like of boolean arrays, all of same shape
They indicate where to position the values: their shape will be the
shape of the returned array. If None, members of "to_union" will be
just concatenated.

Returns
-------
Expand Down Expand Up @@ -243,11 +247,17 @@ def union_categoricals(to_union):
unique_cats = cats.append([c.categories for c in to_union[1:]]).unique()
categories = Index(unique_cats)

new_codes = []
for c in to_union:
indexer = categories.get_indexer(c.categories)
new_codes.append(indexer.take(c.codes))
codes = np.concatenate(new_codes)
if masks is None:
new_codes = []
for c in to_union:
indexer = categories.get_indexer(c.categories)
new_codes.append(indexer.take(c.codes))
codes = np.concatenate(new_codes)
else:
codes = np.empty(shape=masks[0].shape, dtype=first.codes.dtype)
for c, mask in zip(to_union, masks):
indexer = categories.get_indexer(c.categories)
codes[mask] = indexer.take(c.codes)
return Categorical(codes, categories=categories, ordered=False,
fastpath=True)

Expand Down