Skip to content

Commit 19a92df

Browse files
author
harisbal
committed
Rebase
1 parent 856c92b commit 19a92df

File tree

4 files changed

+328
-266
lines changed

4 files changed

+328
-266
lines changed

pandas/core/indexes/base.py

+59-52
Original file line numberDiff line numberDiff line change
@@ -2483,6 +2483,7 @@ def _get_unique_index(self, dropna=False):
24832483
includes list, tuple, array, Series, and must be the same size as
24842484
the index and its dtype must exactly match the index's type.
24852485
2486+
.. versionadded:: 0.17.0
24862487
.. versionadded:: 0.21.0 (list-like tolerance)
24872488
24882489
Returns
@@ -2632,6 +2633,7 @@ def _get_level_values(self, level):
26322633
the same size as the index and its dtype must exactly match the
26332634
index's type.
26342635
2636+
.. versionadded:: 0.17.0
26352637
.. versionadded:: 0.21.0 (list-like tolerance)
26362638
26372639
Examples
@@ -3190,46 +3192,68 @@ def join(self, other, how='left', level=None, return_indexers=False,
31903192

31913193
def _join_multi(self, other, how, return_indexers=True):
31923194
from .multi import MultiIndex
3193-
self_is_mi = isinstance(self, MultiIndex)
3194-
other_is_mi = isinstance(other, MultiIndex)
3195+
from pandas.core.reshape.merge import _complete_multilevel_join
31953196

31963197
# figure out join names
3197-
self_names = _not_none(*self.names)
3198-
other_names = _not_none(*other.names)
3198+
self_names = list(_not_none(*self.names))
3199+
other_names = list(_not_none(*other.names))
31993200
overlap = list(set(self_names) & set(other_names))
32003201

3201-
# need at least 1 in common, but not more than 1
3202+
# need at least 1 in common
32023203
if not len(overlap):
3203-
raise ValueError("cannot join with no level specified and no "
3204-
"overlapping names")
3205-
if len(overlap) > 1:
3206-
raise NotImplementedError("merging with more than one level "
3207-
"overlap on a multi-index is not "
3208-
"implemented")
3209-
jl = overlap[0]
3204+
raise ValueError("cannot join with no overlapping index names")
3205+
3206+
self_is_mi = isinstance(self, MultiIndex)
3207+
other_is_mi = isinstance(other, MultiIndex)
3208+
3209+
# Drop the non matching levels
3210+
ldrop_levels = list(set(self_names) - set(overlap))
3211+
rdrop_levels = list(set(other_names) - set(overlap))
3212+
3213+
if self_is_mi and other_is_mi:
3214+
self_jnlevels = self.droplevel(ldrop_levels)
3215+
other_jnlevels = other.droplevel(rdrop_levels)
3216+
3217+
if not (self_jnlevels.is_unique and other_jnlevels.is_unique):
3218+
raise ValueError("Join on level between two MultiIndex objects"
3219+
"is ambiguous")
3220+
3221+
dropped_levels = ldrop_levels + rdrop_levels
3222+
3223+
join_idx, lidx, ridx = self_jnlevels.join(other_jnlevels, how,
3224+
return_indexers=True)
3225+
3226+
levels, labels, names = _complete_multilevel_join(self, other, how,
3227+
dropped_levels,
3228+
join_idx,
3229+
lidx, ridx)
3230+
3231+
multi_join_idx = MultiIndex(levels=levels, labels=labels,
3232+
names=names, verify_integrity=False)
3233+
3234+
# Check for unused levels
3235+
multi_join_idx = multi_join_idx.remove_unused_levels()
3236+
3237+
return multi_join_idx, lidx, ridx
3238+
3239+
jl = list(overlap)[0]
32103240

32113241
# make the indices into mi's that match
3212-
if not (self_is_mi and other_is_mi):
3213-
3214-
flip_order = False
3215-
if self_is_mi:
3216-
self, other = other, self
3217-
flip_order = True
3218-
# flip if join method is right or left
3219-
how = {'right': 'left', 'left': 'right'}.get(how, how)
3220-
3221-
level = other.names.index(jl)
3222-
result = self._join_level(other, level, how=how,
3223-
return_indexers=return_indexers)
3224-
3225-
if flip_order:
3226-
if isinstance(result, tuple):
3227-
return result[0], result[2], result[1]
3228-
return result
3242+
flip_order = False
3243+
if self_is_mi:
3244+
self, other = other, self
3245+
flip_order = True
3246+
# flip if join method is right or left
3247+
how = {'right': 'left', 'left': 'right'}.get(how, how)
32293248

3230-
# 2 multi-indexes
3231-
raise NotImplementedError("merging with both multi-indexes is not "
3232-
"implemented")
3249+
level = other.names.index(jl)
3250+
result = self._join_level(other, level, how=how,
3251+
return_indexers=return_indexers)
3252+
3253+
if flip_order:
3254+
if isinstance(result, tuple):
3255+
return result[0], result[2], result[1]
3256+
return result
32333257

32343258
def _join_non_unique(self, other, how='left', return_indexers=False):
32353259
from pandas.core.reshape.merge import _get_join_indexers
@@ -3438,8 +3462,8 @@ def _get_string_slice(self, key, use_lhs=True, use_rhs=True):
34383462

34393463
def slice_indexer(self, start=None, end=None, step=None, kind=None):
34403464
"""
3441-
For an ordered or unique index, compute the slice indexer for input
3442-
labels and step.
3465+
For an ordered Index, compute the slice indexer for input labels and
3466+
step
34433467
34443468
Parameters
34453469
----------
@@ -3452,28 +3476,11 @@ def slice_indexer(self, start=None, end=None, step=None, kind=None):
34523476
34533477
Returns
34543478
-------
3455-
indexer : slice
3456-
3457-
Raises
3458-
------
3459-
KeyError : If key does not exist, or key is not unique and index is
3460-
not ordered.
3479+
indexer : ndarray or slice
34613480
34623481
Notes
34633482
-----
34643483
This function assumes that the data is sorted, so use at your own peril
3465-
3466-
Examples
3467-
---------
3468-
This is a method on all index types. For example you can do:
3469-
3470-
>>> idx = pd.Index(list('abcd'))
3471-
>>> idx.slice_indexer(start='b', end='c')
3472-
slice(1, 3)
3473-
3474-
>>> idx = pd.MultiIndex.from_arrays([list('abcd'), list('efgh')])
3475-
>>> idx.slice_indexer(start='b', end=('c', 'g'))
3476-
slice(1, 3)
34773484
"""
34783485
start_slice, end_slice = self.slice_locs(start, end, step=step,
34793486
kind=kind)

pandas/core/reshape/merge.py

+84-12
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ def _groupby_and_merge(by, on, left, right, _merge_pieces,
127127
try:
128128
if k in merged:
129129
merged[k] = key
130-
except KeyError:
130+
except:
131131
pass
132132

133133
pieces.append(merged)
@@ -1140,6 +1140,82 @@ def _get_join_indexers(left_keys, right_keys, sort=False, how='inner',
11401140
return join_func(lkey, rkey, count, **kwargs)
11411141

11421142

1143+
def _complete_multilevel_join(left, right, how, dropped_levels,
1144+
join_idx, lidx, ridx):
1145+
"""
1146+
*this is an internal non-public method*
1147+
1148+
Returns the levels, labels and names of a multilevel to multilevel join
1149+
Depending on the type of join, this method restores the appropriate
1150+
dropped levels of the joined multi-index. The method relies on lidx, ridx
1151+
which hold the index positions of left and right, where a join was feasible
1152+
1153+
Parameters
1154+
----------
1155+
left : Index
1156+
left index
1157+
right : Index
1158+
right index
1159+
join_idx : Index
1160+
the index of the join between the common levels of left and right
1161+
how : {'left', 'right', 'outer', 'inner'}
1162+
lidx : intp array
1163+
left indexer
1164+
right : intp array
1165+
right indexer
1166+
dropped_levels : str array
1167+
list of non-common levels
1168+
1169+
Returns
1170+
-------
1171+
levels : intp array
1172+
levels of combined multiindexes
1173+
labels : str array
1174+
labels of combined multiindexes
1175+
names : str array
1176+
names of combined multiindexes
1177+
1178+
"""
1179+
1180+
join_levels = join_idx.levels
1181+
join_labels = join_idx.labels
1182+
join_names = join_idx.names
1183+
1184+
# lidx and ridx hold the indexes where the join occured
1185+
# for left and right respectively. If left (right) is None it means that
1186+
# the join occured on all indices of left (right)
1187+
if lidx is None:
1188+
lidx = range(0, len(left))
1189+
1190+
if ridx is None:
1191+
ridx = range(0, len(right))
1192+
1193+
# Iterate through the levels that must be restored
1194+
for dl in dropped_levels:
1195+
if dl in left.names:
1196+
idx = left
1197+
indexer = lidx
1198+
else:
1199+
idx = right
1200+
indexer = ridx
1201+
1202+
# The index of the level name to be restored
1203+
name_idx = idx.names.index(dl)
1204+
1205+
restore_levels = idx.levels[name_idx].values
1206+
restore_labels = idx.labels[name_idx]
1207+
1208+
join_levels = join_levels.__add__([restore_levels])
1209+
join_names = join_names.__add__([dl])
1210+
1211+
# Inject -1 in the labels list where a join was not possible
1212+
# IOW indexer[i]=-1
1213+
labels = [restore_labels[i] if i != -1 else -1 for i in indexer]
1214+
join_labels = join_labels.__add__([labels])
1215+
1216+
return join_levels, join_labels, join_names
1217+
1218+
11431219
class _OrderedMerge(_MergeOperation):
11441220
_merge_type = 'ordered_merge'
11451221

@@ -1327,12 +1403,10 @@ def _get_merge_keys(self):
13271403
join_names) = super(_AsOfMerge, self)._get_merge_keys()
13281404

13291405
# validate index types are the same
1330-
for i, (lk, rk) in enumerate(zip(left_join_keys, right_join_keys)):
1406+
for lk, rk in zip(left_join_keys, right_join_keys):
13311407
if not is_dtype_equal(lk.dtype, rk.dtype):
1332-
raise MergeError("incompatible merge keys [{i}] {lkdtype} and "
1333-
"{rkdtype}, must be the same type"
1334-
.format(i=i, lkdtype=lk.dtype,
1335-
rkdtype=rk.dtype))
1408+
raise MergeError("incompatible merge keys, "
1409+
"must be the same type")
13361410

13371411
# validate tolerance; must be a Timedelta if we have a DTI
13381412
if self.tolerance is not None:
@@ -1342,10 +1416,8 @@ def _get_merge_keys(self):
13421416
else:
13431417
lt = left_join_keys[-1]
13441418

1345-
msg = ("incompatible tolerance {tolerance}, must be compat "
1346-
"with type {lkdtype}".format(
1347-
tolerance=type(self.tolerance),
1348-
lkdtype=lt.dtype))
1419+
msg = "incompatible tolerance, must be compat " \
1420+
"with type {lt}".format(lt=type(lt))
13491421

13501422
if is_datetime64_dtype(lt) or is_datetime64tz_dtype(lt):
13511423
if not isinstance(self.tolerance, Timedelta):
@@ -1581,12 +1653,12 @@ def _sort_labels(uniques, left, right):
15811653
# tuplesafe
15821654
uniques = Index(uniques).values
15831655

1584-
llength = len(left)
1656+
l = len(left)
15851657
labels = np.concatenate([left, right])
15861658

15871659
_, new_labels = sorting.safe_sort(uniques, labels, na_sentinel=-1)
15881660
new_labels = _ensure_int64(new_labels)
1589-
new_left, new_right = new_labels[:llength], new_labels[llength:]
1661+
new_left, new_right = new_labels[:l], new_labels[l:]
15901662

15911663
return new_left, new_right
15921664

0 commit comments

Comments
 (0)