Skip to content

Commit 041d2a3

Browse files
committed
ENH: multiple group selection from hierarchical index via reindex and .ix, close #134
1 parent fe065ce commit 041d2a3

File tree

7 files changed

+87
-18
lines changed

7 files changed

+87
-18
lines changed

pandas/core/frame.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1917,15 +1917,11 @@ def reindex_axis(self, labels, axis=0, method=None, level=None, copy=True,
19171917
raise ValueError('Must specify axis=0 or 1')
19181918

19191919
def _reindex_index(self, new_index, method, copy, level, fill_value=np.nan):
1920-
if level is not None:
1921-
assert(isinstance(new_index, MultiIndex))
19221920
new_index, indexer = self.index.reindex(new_index, method, level)
19231921
return self._reindex_with_indexers(new_index, indexer, None, None,
19241922
copy, fill_value)
19251923

19261924
def _reindex_columns(self, new_columns, copy, level, fill_value=np.nan):
1927-
if level is not None:
1928-
assert(isinstance(new_columns, MultiIndex))
19291925
new_columns, indexer = self.columns.reindex(new_columns, level=level)
19301926
return self._reindex_with_indexers(None, None, new_columns, indexer,
19311927
copy, fill_value)
@@ -1934,6 +1930,7 @@ def _reindex_with_indexers(self, index, row_indexer, columns, col_indexer,
19341930
copy, fill_value):
19351931
new_data = self._data
19361932
if row_indexer is not None:
1933+
row_indexer = com._ensure_int32(row_indexer)
19371934
new_data = new_data.reindex_indexer(index, row_indexer, axis=1,
19381935
fill_value=fill_value)
19391936
elif index is not None and index is not new_data.axes[1]:
@@ -1942,6 +1939,7 @@ def _reindex_with_indexers(self, index, row_indexer, columns, col_indexer,
19421939

19431940
if col_indexer is not None:
19441941
# TODO: speed up on homogeneous DataFrame objects
1942+
col_indexer = com._ensure_int32(col_indexer)
19451943
new_data = new_data.reindex_indexer(columns, col_indexer, axis=0,
19461944
fill_value=fill_value)
19471945
elif columns is not None and columns is not new_data.axes[0]:

pandas/core/index.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -641,8 +641,8 @@ def reindex(self, target, method=None, level=None):
641641
"""
642642
target = _ensure_index(target)
643643
if level is not None:
644-
_, indexer, _ = self._join_level(target, level, how='left',
645-
return_indexers=True)
644+
_, indexer, _ = self._join_level(target, level, how='right',
645+
return_indexers=True)
646646
else:
647647
if self.equals(target):
648648
indexer = None
@@ -726,7 +726,6 @@ def _join_level(self, other, level, how='left', return_indexers=False):
726726
how = {'right': 'left', 'left': 'right'}.get(how, how)
727727

728728
level = left._get_level_number(level)
729-
730729
old_level = left.levels[level]
731730

732731
new_level, left_lev_indexer, right_lev_indexer = \
@@ -750,10 +749,10 @@ def _join_level(self, other, level, how='left', return_indexers=False):
750749

751750
join_index = MultiIndex(levels=new_levels, labels=new_labels,
752751
names=left.names)
752+
left_indexer = np.arange(len(left))[new_lev_labels != -1]
753753
else:
754754
join_index = left
755-
756-
left_indexer = None
755+
left_indexer = None
757756

758757
if right_lev_indexer is not None:
759758
right_indexer = right_lev_indexer.take(join_index.labels[level])
@@ -1682,7 +1681,7 @@ def reindex(self, target, method=None, level=None):
16821681
(new_index, indexer, mask) : (MultiIndex, ndarray, ndarray)
16831682
"""
16841683
if level is not None:
1685-
target, _, indexer = self._join_level(target, level, how='left',
1684+
target, indexer, _ = self._join_level(target, level, how='right',
16861685
return_indexers=True)
16871686
else:
16881687
if self.equals(target):

pandas/core/indexing.py

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ def _getitem_axis(self, key, axis=0):
170170

171171
if hasattr(key, 'ndim') and key.ndim > 1:
172172
raise ValueError('Cannot index with multidimensional key')
173-
173+
174174
return self._getitem_iterable(key, axis=axis)
175175
elif axis == 0:
176176
is_int_index = _is_integer_index(labels)
@@ -197,11 +197,19 @@ def _getitem_axis(self, key, axis=0):
197197

198198
def _getitem_iterable(self, key, axis=0):
199199
labels = self.obj._get_axis(axis)
200-
axis_name = self.obj._get_axis_name(axis)
200+
# axis_name = self.obj._get_axis_name(axis)
201+
202+
def _reindex(keys, level=None):
203+
try:
204+
return self.obj.reindex_axis(keys, axis=axis, level=level)
205+
except AttributeError:
206+
# Series
207+
assert(axis == 0)
208+
return self.obj.reindex(keys, level=level)
201209

202210
if com._is_bool_indexer(key):
203211
key = _check_bool_indexer(labels, key)
204-
return self.obj.reindex(**{axis_name: labels[np.asarray(key)]})
212+
return _reindex(labels[np.asarray(key)])
205213
else:
206214
if isinstance(key, Index):
207215
# want Index objects to pass through untouched
@@ -212,7 +220,14 @@ def _getitem_iterable(self, key, axis=0):
212220
if _is_integer_dtype(keyarr) and not _is_integer_index(labels):
213221
keyarr = labels.take(keyarr)
214222

215-
return self.obj.reindex(**{axis_name: keyarr})
223+
# this is not the most robust, but...
224+
if (isinstance(labels, MultiIndex) and
225+
not isinstance(keyarr[0], tuple)):
226+
level = 0
227+
else:
228+
level = None
229+
230+
return _reindex(keyarr, level=level)
216231

217232
def _convert_to_indexer(self, obj, axis=0):
218233
"""

pandas/core/panel.py

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -337,7 +337,7 @@ def _getitem_multilevel(self, key):
337337
result.index = result_index
338338
else:
339339
new_values = self.values[loc, :, :]
340-
result = Panel(new_values,
340+
result = Panel(new_values,
341341
items=self.items[loc],
342342
major_axis=self.major_axis,
343343
minor_axis=self.minor_axis)
@@ -699,6 +699,37 @@ def reindex(self, major=None, items=None, minor=None, method=None,
699699

700700
return result
701701

702+
def reindex_axis(self, labels, axis=0, method=None, level=None, copy=True):
703+
"""Conform Panel to new index with optional filling logic, placing
704+
NA/NaN in locations having no value in the previous index. A new object
705+
is produced unless the new index is equivalent to the current one and
706+
copy=False
707+
708+
Parameters
709+
----------
710+
index : array-like, optional
711+
New labels / index to conform to. Preferably an Index object to
712+
avoid duplicating data
713+
axis : {0, 1}
714+
0 -> index (rows)
715+
1 -> columns
716+
method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None
717+
Method to use for filling holes in reindexed DataFrame
718+
pad / ffill: propagate last valid observation forward to next valid
719+
backfill / bfill: use NEXT valid observation to fill gap
720+
copy : boolean, default True
721+
Return a new object, even if the passed indexes are the same
722+
level : int or name
723+
Broadcast across a level, matching Index values on the
724+
passed MultiIndex level
725+
726+
Returns
727+
-------
728+
reindexed : Panel
729+
"""
730+
self._consolidate_inplace()
731+
return self._reindex_axis(labels, method, axis, copy)
732+
702733
def reindex_like(self, other, method=None):
703734
"""
704735
Reindex Panel to match indices of another Panel

pandas/core/series.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1906,6 +1906,7 @@ def reindex(self, index=None, method=None, level=None, fill_value=np.nan,
19061906

19071907
new_index, fill_vec = self.index.reindex(index, method=method,
19081908
level=level)
1909+
fill_vec = com._ensure_int32(fill_vec)
19091910
new_values = com.take_1d(self.values, fill_vec, fill_value=fill_value)
19101911
return Series(new_values, index=new_index, name=self.name)
19111912

pandas/tests/test_index.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1386,11 +1386,18 @@ def test_reindex_level(self):
13861386
idx = Index(['one'])
13871387

13881388
target, indexer = self.index.reindex(idx, level='second')
1389-
target2, indexer2 = idx.reindex(self.index, idx, level='second')
1389+
target2, indexer2 = idx.reindex(self.index, level='second')
1390+
1391+
exp_index = self.index.join(idx, level='second', how='right')
1392+
exp_index2 = self.index.join(idx, level='second', how='left')
13901393

1391-
exp_index = self.index.join(idx, level='second', how='left')
13921394
self.assert_(target.equals(exp_index))
1393-
self.assert_(target2.equals(exp_index))
1395+
exp_indexer = np.array([0, 2, 4])
1396+
self.assert_(np.array_equal(indexer, exp_indexer))
1397+
1398+
self.assert_(target2.equals(exp_index2))
1399+
exp_indexer2 = np.array([0, -1, 0, -1, 0, -1])
1400+
self.assert_(np.array_equal(indexer2, exp_indexer2))
13941401

13951402
def test_has_duplicates(self):
13961403
self.assert_(not self.index.has_duplicates)

pandas/tests/test_multilevel.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ def _check_op(opname):
8383
op = getattr(DataFrame, opname)
8484
month_sums = self.ymd.sum(level='month')
8585
result = op(self.ymd, month_sums, level='month')
86+
8687
broadcasted = self.ymd.groupby(level='month').transform(np.sum)
8788
expected = op(self.ymd, broadcasted)
8889
assert_frame_equal(result, expected)
@@ -1188,6 +1189,23 @@ def test_mixed_depth_pop(self):
11881189
assert_frame_equal(expected, result)
11891190
assert_frame_equal(df1, df2)
11901191

1192+
def test_reindex_level_partial_selection(self):
1193+
result = self.frame.reindex(['foo', 'qux'], level=0)
1194+
expected = self.frame.ix[[0, 1, 2, 7, 8, 9]]
1195+
assert_frame_equal(result, expected)
1196+
1197+
result = self.frame.T.reindex_axis(['foo', 'qux'], axis=1, level=0)
1198+
assert_frame_equal(result, expected.T)
1199+
1200+
result = self.frame.ix[['foo', 'qux']]
1201+
assert_frame_equal(result, expected)
1202+
1203+
result = self.frame['A'].ix[['foo', 'qux']]
1204+
assert_series_equal(result, expected['A'])
1205+
1206+
result = self.frame.T.ix[:, ['foo', 'qux']]
1207+
assert_frame_equal(result, expected.T)
1208+
11911209
def test_drop_level(self):
11921210
result = self.frame.drop(['bar', 'qux'], level='first')
11931211
expected = self.frame.ix[[0, 1, 2, 5, 6]]

0 commit comments

Comments
 (0)