Skip to content

Commit b15a376

Browse files
committed
Merge pull request pandas-dev#4761 from jreback/dt_multi
BUG: in multi-indexing with a partial string selection (GH4758)
2 parents d6633ad + ebee2f8 commit b15a376

File tree

7 files changed

+609
-501
lines changed

7 files changed

+609
-501
lines changed

doc/source/release.rst

+1
Original file line numberDiff line numberDiff line change
@@ -322,6 +322,7 @@ See :ref:`Internal Refactoring<whatsnew_0130.refactoring>`
322322
- Bug in using ``iloc/loc`` with a cross-sectional and duplicate indicies (:issue:`4726`)
323323
- Bug with using ``QUOTE_NONE`` with ``to_csv`` causing ``Exception``. (:issue:`4328`)
324324
- Bug with Series indexing not raising an error when the right-hand-side has an incorrect length (:issue:`2702`)
325+
- Bug in multi-indexing with a partial string selection as one part of a MultIndex (:issue:`4758`)
325326

326327
pandas 0.12
327328
===========

pandas/core/index.py

+32-10
Original file line numberDiff line numberDiff line change
@@ -2596,10 +2596,15 @@ def _maybe_drop_levels(indexer, levels, drop_level):
25962596
if not drop_level:
25972597
return self[indexer]
25982598
# kludgearound
2599-
new_index = self[indexer]
2599+
orig_index = new_index = self[indexer]
26002600
levels = [self._get_level_number(i) for i in levels]
26012601
for i in sorted(levels, reverse=True):
2602-
new_index = new_index.droplevel(i)
2602+
try:
2603+
new_index = new_index.droplevel(i)
2604+
except:
2605+
2606+
# no dropping here
2607+
return orig_index
26032608
return new_index
26042609

26052610
if isinstance(level, (tuple, list)):
@@ -2635,20 +2640,37 @@ def _maybe_drop_levels(indexer, levels, drop_level):
26352640
pass
26362641

26372642
if not any(isinstance(k, slice) for k in key):
2638-
if len(key) == self.nlevels:
2639-
if self.is_unique:
2640-
return self._engine.get_loc(_values_from_object(key)), None
2641-
else:
2642-
indexer = slice(*self.slice_locs(key, key))
2643-
return indexer, self[indexer]
2644-
else:
2645-
# partial selection
2643+
2644+
# partial selection
2645+
def partial_selection(key):
26462646
indexer = slice(*self.slice_locs(key, key))
26472647
if indexer.start == indexer.stop:
26482648
raise KeyError(key)
26492649
ilevels = [i for i in range(len(key))
26502650
if key[i] != slice(None, None)]
26512651
return indexer, _maybe_drop_levels(indexer, ilevels, drop_level)
2652+
2653+
if len(key) == self.nlevels:
2654+
2655+
if self.is_unique:
2656+
2657+
# here we have a completely specified key, but are using some partial string matching here
2658+
# GH4758
2659+
can_index_exactly = any([ l.is_all_dates and not isinstance(k,compat.string_types) for k, l in zip(key, self.levels) ])
2660+
if any([ l.is_all_dates for k, l in zip(key, self.levels) ]) and not can_index_exactly:
2661+
indexer = slice(*self.slice_locs(key, key))
2662+
2663+
# we have a multiple selection here
2664+
if not indexer.stop-indexer.start == 1:
2665+
return partial_selection(key)
2666+
2667+
key = tuple(self[indexer].tolist()[0])
2668+
2669+
return self._engine.get_loc(_values_from_object(key)), None
2670+
else:
2671+
return partial_selection(key)
2672+
else:
2673+
return partial_selection(key)
26522674
else:
26532675
indexer = None
26542676
for i, k in enumerate(key):

pandas/core/indexing.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -952,9 +952,15 @@ def _has_valid_type(self, key, axis):
952952
if not len(ax):
953953
raise KeyError("The [%s] axis is empty" % self.obj._get_axis_name(axis))
954954

955-
if not key in ax:
955+
try:
956+
if not key in ax:
957+
raise KeyError("the label [%s] is not in the [%s]" % (key,self.obj._get_axis_name(axis)))
958+
except (TypeError):
959+
960+
# if we have a weird type of key/ax
956961
raise KeyError("the label [%s] is not in the [%s]" % (key,self.obj._get_axis_name(axis)))
957962

963+
958964
return True
959965

960966
def _getitem_axis(self, key, axis=0):

pandas/tests/test_multilevel.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1842,9 +1842,9 @@ def test_duplicate_mi(self):
18421842
columns=list('ABCD'))
18431843
df = df.set_index(['A','B'])
18441844
df = df.sortlevel(0)
1845-
result = df.loc[('foo','bar')]
18461845
expected = DataFrame([['foo','bar',1.0,1],['foo','bar',2.0,2],['foo','bar',5.0,5]],
18471846
columns=list('ABCD')).set_index(['A','B'])
1847+
result = df.loc[('foo','bar')]
18481848
assert_frame_equal(result,expected)
18491849

18501850
def test_multiindex_set_index(self):

pandas/tools/tests/test_pivot.py

+1
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,7 @@ def _check_output(res, col, rows=['A', 'B'], cols=['C']):
175175
exp = self.data.groupby(rows)[col].mean()
176176
tm.assert_series_equal(cmarg, exp)
177177

178+
res.sortlevel(inplace=True)
178179
rmarg = res.xs(('All', ''))[:-1]
179180
exp = self.data.groupby(cols)[col].mean()
180181
tm.assert_series_equal(rmarg, exp)

0 commit comments

Comments
 (0)