Skip to content

Commit d53a981

Browse files
committed
Enable is_dtype_equal on CategoricalIndex, fixed some doc typos, added ordered CategoricalIndex test
1 parent 95348c1 commit d53a981

File tree

5 files changed

+36
-19
lines changed

5 files changed

+36
-19
lines changed

doc/source/whatsnew/v0.20.3.txt

+1
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ Indexing
5151
^^^^^^^^
5252

5353
- Bug in ``Float64Index`` causing an empty array instead of ``None`` to be returned from ``.get(np.nan)`` on a Series whose index did not contain any ``NaN`` s (:issue:`8569`)
54+
- Fixed a bug that prevented joining on a categorical MultiIndex (:issue:`16627`).
5455

5556
I/O
5657
^^^

doc/source/whatsnew/v0.21.0.txt

-1
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,6 @@ Indexing
100100

101101
- When called with a null slice (e.g. ``df.iloc[:]``), the ``.iloc`` and ``.loc`` indexers return a shallow copy of the original object. Previously they returned the original object. (:issue:`13873`).
102102
- When called on an unsorted ``MultiIndex``, the ``loc`` indexer now will raise ``UnsortedIndexError`` only if proper slicing is used on non-sorted levels (:issue:`16734`).
103-
- Fixed a bug that prevented joining on a categorical MultiIndex (:issue:`13873`).
104103

105104

106105
I/O

pandas/core/indexes/category.py

+3
Original file line numberDiff line numberDiff line change
@@ -560,6 +560,9 @@ def take(self, indices, axis=0, allow_fill=True,
560560
na_value=-1)
561561
return self._create_from_codes(taken)
562562

563+
def is_dtype_equal(self, other):
564+
return self._data.is_dtype_equal(other)
565+
563566
take_nd = take
564567

565568
def map(self, mapper):

pandas/core/reshape/merge.py

+2-6
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
import pandas.compat as compat
1212

1313
from pandas import (Categorical, Series, DataFrame,
14-
Index, MultiIndex, Timedelta, CategoricalIndex)
14+
Index, MultiIndex, Timedelta)
1515
from pandas.core.frame import _merge_doc
1616
from pandas.core.dtypes.common import (
1717
is_datetime64tz_dtype,
@@ -1441,13 +1441,9 @@ def _factorize_keys(lk, rk, sort=True):
14411441
rk = rk.values
14421442

14431443
# if we exactly match in categories, allow us to use codes
1444-
if isinstance(lk, CategoricalIndex):
1445-
ldata = lk._data
1446-
else:
1447-
ldata = lk
14481444
if (is_categorical_dtype(lk) and
14491445
is_categorical_dtype(rk) and
1450-
ldata.is_dtype_equal(rk)):
1446+
lk.is_dtype_equal(rk)):
14511447
return lk.codes, rk.codes, len(lk.categories)
14521448

14531449
if is_int_or_datetime_dtype(lk) and is_int_or_datetime_dtype(rk):

pandas/tests/test_join.py

+30-12
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
# -*- coding: utf-8 -*-
22

33
import numpy as np
4-
from pandas import Index
4+
from pandas import Index, DataFrame, Categorical, merge
55

66
from pandas._libs import join as _join
77
import pandas.util.testing as tm
8-
from pandas.util.testing import assert_almost_equal
8+
from pandas.util.testing import assert_almost_equal, assert_frame_equal
99

1010

1111
class TestIndexer(object):
@@ -196,20 +196,38 @@ def test_inner_join_indexer2():
196196

197197
def test_merge_join_categorical_multiindex():
198198
# From issue 16627
199-
import pandas as pd
200-
a = {'Cat1': pd.Categorical(['a', 'b', 'a', 'c', 'a', 'b'],
201-
['a', 'b', 'c']),
199+
a = {'Cat1': Categorical(['a', 'b', 'a', 'c', 'a', 'b'],
200+
['a', 'b', 'c']),
202201
'Int1': [0, 1, 0, 1, 0, 0]}
203-
a = pd.DataFrame(a)
202+
a = DataFrame(a)
204203

205-
b = {'Cat': pd.Categorical(['a', 'b', 'c', 'a', 'b', 'c'],
206-
['a', 'b', 'c']),
204+
b = {'Cat': Categorical(['a', 'b', 'c', 'a', 'b', 'c'],
205+
['a', 'b', 'c']),
207206
'Int': [0, 0, 0, 1, 1, 1],
208207
'Factor': [1.1, 1.2, 1.3, 1.4, 1.5, 1.6]}
209-
b = pd.DataFrame(b).set_index(['Cat', 'Int'])['Factor']
208+
b = DataFrame(b).set_index(['Cat', 'Int'])['Factor']
210209

211-
c = pd.merge(a, b.reset_index(), left_on=['Cat1', 'Int1'],
212-
right_on=['Cat', 'Int'], how='left')
210+
c = merge(a, b.reset_index(), left_on=['Cat1', 'Int1'],
211+
right_on=['Cat', 'Int'], how='left')
213212
d = a.join(b, on=['Cat1', 'Int1'])
214213
c = c.drop(['Cat', 'Int'], axis=1)
215-
assert_almost_equal(c, d)
214+
assert_frame_equal(c, d)
215+
216+
a = {'Cat1': Categorical(['a', 'b', 'a', 'c', 'a', 'b'],
217+
['b', 'a', 'c'],
218+
ordered=True),
219+
'Int1': [0, 1, 0, 1, 0, 0]}
220+
a = DataFrame(a)
221+
222+
b = {'Cat': Categorical(['a', 'b', 'c', 'a', 'b', 'c'],
223+
['b', 'a', 'c'],
224+
ordered=True),
225+
'Int': [0, 0, 0, 1, 1, 1],
226+
'Factor': [1.1, 1.2, 1.3, 1.4, 1.5, 1.6]}
227+
b = DataFrame(b).set_index(['Cat', 'Int'])['Factor']
228+
229+
c = merge(a, b.reset_index(), left_on=['Cat1', 'Int1'],
230+
right_on=['Cat', 'Int'], how='left')
231+
d = a.join(b, on=['Cat1', 'Int1'])
232+
c = c.drop(['Cat', 'Int'], axis=1)
233+
assert_frame_equal(c, d)

0 commit comments

Comments
 (0)