Skip to content

Commit de84842

Browse files
committed
ENH: allow per_axis, per_level multiindex setting
TST: better error messages when levels are not sorted with core/index/get_locs ENH: add boolean indexer support on per_axis/per_level BUG: handle a multi-level indexed series passed like with a nested tuple of selectors e.g. something like: s.loc['A1':'A3',:,['C1','C3']]
1 parent 7320263 commit de84842

File tree

3 files changed

+224
-29
lines changed

3 files changed

+224
-29
lines changed

pandas/core/index.py

+50-24
Original file line numberDiff line numberDiff line change
@@ -3231,34 +3231,52 @@ def partial_selection(key):
32313231
if key[i] != slice(None, None)]
32323232
return indexer, _maybe_drop_levels(indexer, ilevels,
32333233
drop_level)
3234-
elif isinstance(key, slice):
3235-
# handle a passed slice for this level
3236-
start = self._get_level_indexer(key.start,level=level)
3237-
stop = self._get_level_indexer(key.stop,level=level)
3238-
step = key.step
3239-
indexer = slice(start.start,stop.start,step)
3240-
return indexer, _maybe_drop_levels(indexer, [level], drop_level)
32413234
else:
32423235
indexer = self._get_level_indexer(key, level=level)
3243-
new_index = _maybe_drop_levels(indexer, [level], drop_level)
3244-
return indexer, new_index
3236+
return indexer, _maybe_drop_levels(indexer, [level], drop_level)
32453237

32463238
def _get_level_indexer(self, key, level=0):
3239+
# return a boolean indexer or a slice showing where the key is
3240+
# in the totality of values
3241+
32473242
level_index = self.levels[level]
3248-
loc = level_index.get_loc(key)
32493243
labels = self.labels[level]
32503244

3251-
if level > 0 or self.lexsort_depth == 0:
3252-
return np.array(labels == loc,dtype=bool)
3245+
if isinstance(key, slice):
3246+
# handle a slice, returnig a slice if we can
3247+
# otherwise a boolean indexer
3248+
3249+
start = level_index.get_loc(key.start)
3250+
stop = level_index.get_loc(key.stop)
3251+
step = key.step
3252+
3253+
if level > 0 or self.lexsort_depth == 0:
3254+
# need to have like semantics here to right
3255+
# searching as when we are using a slice
3256+
# so include the stop+1 (so we include stop)
3257+
m = np.zeros(len(labels),dtype=bool)
3258+
m[np.in1d(labels,np.arange(start,stop+1,step))] = True
3259+
return m
3260+
else:
3261+
# sorted, so can return slice object -> view
3262+
i = labels.searchsorted(start, side='left')
3263+
j = labels.searchsorted(stop, side='right')
3264+
return slice(i, j, step)
3265+
32533266
else:
3254-
# sorted, so can return slice object -> view
3255-
i = labels.searchsorted(loc, side='left')
3256-
j = labels.searchsorted(loc, side='right')
3257-
return slice(i, j)
3267+
3268+
loc = level_index.get_loc(key)
3269+
if level > 0 or self.lexsort_depth == 0:
3270+
return np.array(labels == loc,dtype=bool)
3271+
else:
3272+
# sorted, so can return slice object -> view
3273+
i = labels.searchsorted(loc, side='left')
3274+
j = labels.searchsorted(loc, side='right')
3275+
return slice(i, j)
32583276

32593277
def get_locs(self, tup):
32603278
"""
3261-
Given a tuple of slices/lists/labels to a level-wise spec
3279+
Given a tuple of slices/lists/labels/boolean indexer to a level-wise spec
32623280
produce an indexer to extract those locations
32633281
32643282
Parameters
@@ -3272,8 +3290,11 @@ def get_locs(self, tup):
32723290
"""
32733291

32743292
# must be lexsorted to at least as many levels
3275-
assert self.is_lexsorted_for_tuple(tup)
3276-
assert self.is_unique
3293+
if not self.is_lexsorted_for_tuple(tup):
3294+
raise KeyError('MultiIndex Slicing requires the index to be fully lexsorted'
3295+
' tuple len ({0}), lexsort depth ({1})'.format(len(tup), self.lexsort_depth))
3296+
if not self.is_unique:
3297+
raise ValueError('MultiIndex Slicing requires a unique index')
32773298

32783299
def _convert_indexer(r):
32793300
if isinstance(r, slice):
@@ -3285,7 +3306,14 @@ def _convert_indexer(r):
32853306
ranges = []
32863307
for i,k in enumerate(tup):
32873308

3288-
if com.is_list_like(k):
3309+
if com._is_bool_indexer(k):
3310+
# a boolean indexer, must be the same length!
3311+
k = np.asarray(k)
3312+
if len(k) != len(self):
3313+
raise ValueError("cannot index with a boolean indexer that is"
3314+
" not the same length as the index")
3315+
ranges.append(k)
3316+
elif com.is_list_like(k):
32893317
# a collection of labels to include from this level (these are or'd)
32903318
ranges.append(reduce(
32913319
np.logical_or,[ _convert_indexer(self._get_level_indexer(x, level=i)
@@ -3294,10 +3322,8 @@ def _convert_indexer(r):
32943322
# include all from this level
32953323
pass
32963324
elif isinstance(k,slice):
3297-
start = self._get_level_indexer(k.start,level=i)
3298-
stop = self._get_level_indexer(k.stop,level=i)
3299-
step = k.step
3300-
ranges.append(slice(start.start,stop.start,step))
3325+
# a slice, include BOTH of the labels
3326+
ranges.append(self._get_level_indexer(k,level=i))
33013327
else:
33023328
# a single label
33033329
ranges.append(self.get_loc_level(k,level=i,drop_level=False)[0])

pandas/core/indexing.py

+16-4
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@ def _slice(self, obj, axis=0, raise_on_error=False, typ=None):
9999
typ=typ)
100100

101101
def __setitem__(self, key, value):
102+
102103
# kludgetastic
103104
ax = self.obj._get_axis(0)
104105
if isinstance(ax, MultiIndex):
@@ -131,6 +132,11 @@ def _has_valid_tuple(self, key):
131132
raise ValueError("Location based indexing can only have [%s] "
132133
"types" % self._valid_types)
133134

135+
def _is_nested_tuple_indexer(self, tup):
136+
if any([ isinstance(ax, MultiIndex) for ax in self.obj.axes ]):
137+
return any([ _is_nested_tuple(tup,ax) for ax in self.obj.axes ])
138+
return False
139+
134140
def _convert_tuple(self, key, is_setter=False):
135141
keyidx = []
136142
for i, k in enumerate(key):
@@ -716,9 +722,8 @@ def _handle_lowerdim_multi_index_axis0(self, tup):
716722
def _getitem_lowerdim(self, tup):
717723

718724
# we may have a nested tuples indexer here
719-
if any([ isinstance(ax, MultiIndex) for ax in self.obj.axes ]):
720-
if any([ _is_nested_tuple(tup,ax) for ax in self.obj.axes ]):
721-
return self._getitem_nested_tuple(tup)
725+
if self._is_nested_tuple_indexer(tup):
726+
return self._getitem_nested_tuple(tup)
722727

723728
# we maybe be using a tuple to represent multiple dimensions here
724729
ax0 = self.obj._get_axis(0)
@@ -772,7 +777,12 @@ def _getitem_nested_tuple(self, tup):
772777
# multi-index dimension, try to see if we have something like
773778
# a tuple passed to a series with a multi-index
774779
if len(tup) > self.ndim:
775-
return self._handle_lowerdim_multi_index_axis0(tup)
780+
result = self._handle_lowerdim_multi_index_axis0(tup)
781+
if result is not None:
782+
return result
783+
784+
# this is a series with a multi-index specified a tuple of selectors
785+
return self._getitem_axis(tup, axis=0, validate_iterable=True)
776786

777787
# handle the multi-axis by taking sections and reducing
778788
# this is iterative
@@ -983,6 +993,8 @@ def _convert_to_indexer(self, obj, axis=0, is_setter=False):
983993
if isinstance(obj, slice):
984994
return self._convert_slice_indexer(obj, axis)
985995

996+
elif _is_nested_tuple(obj, labels):
997+
return labels.get_locs(obj)
986998
elif _is_list_like(obj):
987999
if com._is_bool_indexer(obj):
9881000
obj = _check_bool_indexer(labels, obj)

pandas/tests/test_indexing.py

+158-1
Original file line numberDiff line numberDiff line change
@@ -1072,7 +1072,13 @@ def mklbl(prefix,n):
10721072
ix = MultiIndex.from_product([mklbl('A',5),mklbl('B',7),mklbl('C',4),mklbl('D',2)])
10731073
df = DataFrame(np.arange(len(ix.get_values())),index=ix)
10741074
result = df.loc[(slice('A1','A3'),slice(None), ['C1','C3']),:]
1075-
expected = df.loc[[ tuple([a,b,c,d]) for a,b,c,d in df.index.values if (a == 'A1' or a == 'A2') and (c == 'C1' or c == 'C3')]]
1075+
expected = df.loc[[ tuple([a,b,c,d]) for a,b,c,d in df.index.values if (
1076+
a == 'A1' or a == 'A2' or a == 'A3') and (c == 'C1' or c == 'C3')]]
1077+
assert_frame_equal(result, expected)
1078+
1079+
expected = df.loc[[ tuple([a,b,c,d]) for a,b,c,d in df.index.values if (
1080+
a == 'A1' or a == 'A2' or a == 'A3') and (c == 'C1' or c == 'C2' or c == 'C3')]]
1081+
result = df.loc[(slice('A1','A3'),slice(None), slice('C1','C3')),:]
10761082
assert_frame_equal(result, expected)
10771083

10781084
# test multi-index slicing with per axis and per index controls
@@ -1121,13 +1127,164 @@ def mklbl(prefix,n):
11211127
expected = df.iloc[[0,1,3]]
11221128
assert_frame_equal(result, expected)
11231129

1130+
# multi-level series
1131+
s = Series(np.arange(len(ix.get_values())),index=ix)
1132+
result = s.loc['A1':'A3', :, ['C1','C3']]
1133+
expected = s.loc[[ tuple([a,b,c,d]) for a,b,c,d in s.index.values if (
1134+
a == 'A1' or a == 'A2' or a == 'A3') and (c == 'C1' or c == 'C3')]]
1135+
assert_series_equal(result, expected)
1136+
1137+
# boolean indexers
1138+
result = df.loc[(slice(None),df.loc[:,('a','bar')]>5),:]
1139+
expected = df.iloc[[2,3]]
1140+
assert_frame_equal(result, expected)
1141+
1142+
def f():
1143+
df.loc[(slice(None),np.array([True,False])),:]
1144+
self.assertRaises(ValueError, f)
1145+
11241146
# ambiguous cases
11251147
# these can be multiply interpreted
11261148
# but we can catch this in some cases
11271149
def f():
11281150
df.loc[(slice(None),[1])]
11291151
self.assertRaises(KeyError, f)
11301152

1153+
def test_per_axis_per_level_getitem_doc_examples(self):
1154+
1155+
# from indexing.rst / advanced
1156+
def mklbl(prefix,n):
1157+
return ["%s%s" % (prefix,i) for i in range(n)]
1158+
1159+
index = MultiIndex.from_product([mklbl('A',4),
1160+
mklbl('B',2),
1161+
mklbl('C',4),
1162+
mklbl('D',2)])
1163+
columns = MultiIndex.from_tuples([('a','foo'),('a','bar'),
1164+
('b','foo'),('b','bah')],
1165+
names=['lvl0', 'lvl1'])
1166+
df = DataFrame(np.arange(len(index)*len(columns)).reshape((len(index),len(columns))),
1167+
index=index,
1168+
columns=columns)
1169+
result = df.loc[(slice('A1','A3'),slice(None), ['C1','C3']),:]
1170+
expected = df.loc[[ tuple([a,b,c,d]) for a,b,c,d in df.index.values if (
1171+
a == 'A1' or a == 'A2' or a == 'A3') and (c == 'C1' or c == 'C3')]]
1172+
assert_frame_equal(result, expected)
1173+
1174+
result = df.loc[(slice(None),slice(None), ['C1','C3']),:]
1175+
expected = df.loc[[ tuple([a,b,c,d]) for a,b,c,d in df.index.values if (
1176+
c == 'C1' or c == 'C3')]]
1177+
assert_frame_equal(result, expected)
1178+
1179+
# not sorted
1180+
def f():
1181+
df.loc['A1',(slice(None),'foo')]
1182+
self.assertRaises(KeyError, f)
1183+
df = df.sortlevel(axis=1)
1184+
1185+
df.loc['A1',(slice(None),'foo')]
1186+
df.loc[(slice(None),slice(None), ['C1','C3']),(slice(None),'foo')]
1187+
1188+
def test_per_axis_per_level_setitem(self):
1189+
1190+
# test multi-index slicing with per axis and per index controls
1191+
index = MultiIndex.from_tuples([('A',1),('A',2),('A',3),('B',1)],
1192+
names=['one','two'])
1193+
columns = MultiIndex.from_tuples([('a','foo'),('a','bar'),('b','foo'),('b','bah')],
1194+
names=['lvl0', 'lvl1'])
1195+
1196+
df_orig = DataFrame(np.arange(16).reshape(4, 4), index=index, columns=columns)
1197+
df_orig = df_orig.sortlevel(axis=0).sortlevel(axis=1)
1198+
1199+
# identity
1200+
df = df_orig.copy()
1201+
df.loc[(slice(None),slice(None)),:] = 100
1202+
expected = df_orig.copy()
1203+
expected.iloc[:,:] = 100
1204+
assert_frame_equal(df, expected)
1205+
1206+
df = df_orig.copy()
1207+
df.loc[(slice(None),slice(None)),(slice(None),slice(None))] = 100
1208+
expected = df_orig.copy()
1209+
expected.iloc[:,:] = 100
1210+
assert_frame_equal(df, expected)
1211+
1212+
df = df_orig.copy()
1213+
df.loc[:,(slice(None),slice(None))] = 100
1214+
expected = df_orig.copy()
1215+
expected.iloc[:,:] = 100
1216+
assert_frame_equal(df, expected)
1217+
1218+
# index
1219+
df = df_orig.copy()
1220+
df.loc[(slice(None),[1]),:] = 100
1221+
expected = df_orig.copy()
1222+
expected.iloc[[0,3]] = 100
1223+
assert_frame_equal(df, expected)
1224+
1225+
df = df_orig.copy()
1226+
df.loc[(slice(None),1),:] = 100
1227+
expected = df_orig.copy()
1228+
expected.iloc[[0,3]] = 100
1229+
assert_frame_equal(df, expected)
1230+
1231+
# columns
1232+
df = df_orig.copy()
1233+
df.loc[:,(slice(None),['foo'])] = 100
1234+
expected = df_orig.copy()
1235+
expected.iloc[:,[1,3]] = 100
1236+
assert_frame_equal(df, expected)
1237+
1238+
# both
1239+
df = df_orig.copy()
1240+
df.loc[(slice(None),1),(slice(None),['foo'])] = 100
1241+
expected = df_orig.copy()
1242+
expected.iloc[[0,3],[1,3]] = 100
1243+
assert_frame_equal(df, expected)
1244+
1245+
df = df_orig.copy()
1246+
df.loc['A','a'] = 100
1247+
expected = df_orig.copy()
1248+
expected.iloc[0:3,0:2] = 100
1249+
assert_frame_equal(df, expected)
1250+
1251+
# setting with a list-like
1252+
df = df_orig.copy()
1253+
df.loc[(slice(None),1),(slice(None),['foo'])] = np.array([[100, 100], [100, 100]],dtype='int64')
1254+
expected = df_orig.copy()
1255+
expected.iloc[[0,3],[1,3]] = 100
1256+
assert_frame_equal(df, expected)
1257+
1258+
# not enough values
1259+
df = df_orig.copy()
1260+
def f():
1261+
df.loc[(slice(None),1),(slice(None),['foo'])] = np.array([[100], [100, 100]],dtype='int64')
1262+
self.assertRaises(ValueError, f)
1263+
def f():
1264+
df.loc[(slice(None),1),(slice(None),['foo'])] = np.array([100, 100, 100, 100],dtype='int64')
1265+
self.assertRaises(ValueError, f)
1266+
1267+
# with an alignable rhs
1268+
df = df_orig.copy()
1269+
df.loc[(slice(None),1),(slice(None),['foo'])] = df.loc[(slice(None),1),(slice(None),['foo'])] * 5
1270+
expected = df_orig.copy()
1271+
expected.iloc[[0,3],[1,3]] = expected.iloc[[0,3],[1,3]] * 5
1272+
assert_frame_equal(df, expected)
1273+
1274+
df = df_orig.copy()
1275+
df.loc[(slice(None),1),(slice(None),['foo'])] *= df.loc[(slice(None),1),(slice(None),['foo'])]
1276+
expected = df_orig.copy()
1277+
expected.iloc[[0,3],[1,3]] *= expected.iloc[[0,3],[1,3]]
1278+
assert_frame_equal(df, expected)
1279+
1280+
rhs = df_orig.loc[(slice(None),1),(slice(None),['foo'])].copy()
1281+
rhs.loc[:,('c','bah')] = 10
1282+
df = df_orig.copy()
1283+
df.loc[(slice(None),1),(slice(None),['foo'])] *= rhs
1284+
expected = df_orig.copy()
1285+
expected.iloc[[0,3],[1,3]] *= expected.iloc[[0,3],[1,3]]
1286+
assert_frame_equal(df, expected)
1287+
11311288
def test_getitem_multiindex(self):
11321289

11331290
# GH 5725

0 commit comments

Comments
 (0)