Skip to content

Commit ea1d8fa

Browse files
authored
BUG: fix in categorical merges (#32079)
1 parent 4800ab4 commit ea1d8fa

File tree

4 files changed

+33
-1
lines changed

4 files changed

+33
-1
lines changed

doc/source/whatsnew/v1.1.0.rst

+2-1
Original file line numberDiff line numberDiff line change
@@ -126,9 +126,10 @@ Bug fixes
126126

127127
Categorical
128128
^^^^^^^^^^^
129+
130+
- Bug where :func:`merge` was unable to join on non-unique categorical indices (:issue:`28189`)
129131
- Bug when passing categorical data to :class:`Index` constructor along with ``dtype=object`` incorrectly returning a :class:`CategoricalIndex` instead of object-dtype :class:`Index` (:issue:`32167`)
130132
-
131-
-
132133

133134
Datetimelike
134135
^^^^^^^^^^^^

pandas/_libs/join.pyx

+2
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,8 @@ ctypedef fused join_t:
254254
float64_t
255255
float32_t
256256
object
257+
int8_t
258+
int16_t
257259
int32_t
258260
int64_t
259261
uint64_t

pandas/core/indexes/category.py

+7
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
from pandas.core.indexes.base import Index, _index_shared_docs, maybe_extract_name
3030
from pandas.core.indexes.extension import ExtensionIndex, inherit_names
3131
import pandas.core.missing as missing
32+
from pandas.core.ops import get_op_result_name
3233

3334
_index_doc_kwargs = dict(ibase._index_doc_kwargs)
3435
_index_doc_kwargs.update(dict(target_klass="CategoricalIndex"))
@@ -763,6 +764,12 @@ def _delegate_method(self, name: str, *args, **kwargs):
763764
return res
764765
return CategoricalIndex(res, name=self.name)
765766

767+
def _wrap_joined_index(
768+
self, joined: np.ndarray, other: "CategoricalIndex"
769+
) -> "CategoricalIndex":
770+
name = get_op_result_name(self, other)
771+
return self._create_from_codes(joined, name=name)
772+
766773

767774
CategoricalIndex._add_numeric_methods_add_sub_disabled()
768775
CategoricalIndex._add_numeric_methods_disabled()

pandas/tests/reshape/merge/test_merge.py

+22
Original file line numberDiff line numberDiff line change
@@ -2163,3 +2163,25 @@ def test_merge_datetime_upcast_dtype():
21632163
}
21642164
)
21652165
tm.assert_frame_equal(result, expected)
2166+
2167+
2168+
@pytest.mark.parametrize("n_categories", [5, 128])
2169+
def test_categorical_non_unique_monotonic(n_categories):
2170+
# GH 28189
2171+
# With n_categories as 5, we test the int8 case is hit in libjoin,
2172+
# with n_categories as 128 we test the int16 case.
2173+
left_index = CategoricalIndex([0] + list(range(n_categories)))
2174+
df1 = DataFrame(range(n_categories + 1), columns=["value"], index=left_index)
2175+
df2 = DataFrame(
2176+
[[6]],
2177+
columns=["value"],
2178+
index=CategoricalIndex([0], categories=np.arange(n_categories)),
2179+
)
2180+
2181+
result = merge(df1, df2, how="left", left_index=True, right_index=True)
2182+
expected = DataFrame(
2183+
[[i, 6.0] if i < 2 else [i, np.nan] for i in range(n_categories + 1)],
2184+
columns=["value_x", "value_y"],
2185+
index=left_index,
2186+
)
2187+
tm.assert_frame_equal(expected, result)

0 commit comments

Comments
 (0)