Skip to content

BUG: fix in categorical merges #32079

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Feb 27, 2020
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ Bug fixes
Categorical
^^^^^^^^^^^

-
- Bug where :func:`merge` was unable to join on non-unique categorical indices (:issue:`28189`)
-

Datetimelike
Expand Down
2 changes: 2 additions & 0 deletions pandas/_libs/join.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,8 @@ ctypedef fused join_t:
float64_t
float32_t
object
int8_t
int16_t
int32_t
int64_t
uint64_t
Expand Down
7 changes: 7 additions & 0 deletions pandas/core/indexes/category.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
from pandas.core.indexes.base import Index, _index_shared_docs, maybe_extract_name
from pandas.core.indexes.extension import ExtensionIndex, inherit_names
import pandas.core.missing as missing
from pandas.core.ops import get_op_result_name

_index_doc_kwargs = dict(ibase._index_doc_kwargs)
_index_doc_kwargs.update(dict(target_klass="CategoricalIndex"))
Expand Down Expand Up @@ -781,6 +782,12 @@ def _delegate_method(self, name: str, *args, **kwargs):
return res
return CategoricalIndex(res, name=self.name)

def _wrap_joined_index(
self, joined: np.ndarray, other: "CategoricalIndex"
) -> "CategoricalIndex":
name = get_op_result_name(self, other)
return self._create_from_codes(joined, name=name)


CategoricalIndex._add_numeric_methods_add_sub_disabled()
CategoricalIndex._add_numeric_methods_disabled()
Expand Down
14 changes: 14 additions & 0 deletions pandas/tests/reshape/merge/test_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -2163,3 +2163,17 @@ def test_merge_datetime_upcast_dtype():
}
)
tm.assert_frame_equal(result, expected)


def test_categorical_non_unique_monotonic():
# GH 28189
df = DataFrame(range(4), columns=["value"], index=CategoricalIndex(["1"] * 4))
df2 = DataFrame([[6]], columns=["value"], index=CategoricalIndex(["1"]))

result = merge(df, df2, how="left", left_index=True, right_index=True)
expected = DataFrame(
[[0, 6], [1, 6], [2, 6], [3, 6]],
columns=["value_x", "value_y"],
index=CategoricalIndex(["1"] * 4),
)
tm.assert_frame_equal(expected, result)