Skip to content

Commit 5dfb2ee

Browse files
committed
ENH: reindex_multi in Panel per #979
1 parent a66543b commit 5dfb2ee

File tree

5 files changed

+68
-13
lines changed

5 files changed

+68
-13
lines changed

RELEASE.rst

+1
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ pandas 0.8.1
5252
- Improve performance of array-of-Period to PeriodIndex, convert such arrays
5353
to PeriodIndex inside Index (#1215)
5454
- More informative string representation for weekly Period objects (#1503)
55+
- Accelerate 3-axis multi data selection from homogeneous Panel (#979)
5556

5657
**Bug fixes**
5758

pandas/core/common.py

+9-4
Original file line numberDiff line numberDiff line change
@@ -254,7 +254,7 @@ def take_1d(arr, indexer, out=None, fill_value=np.nan):
254254

255255
return out
256256

257-
def take_2d_multi(arr, row_idx, col_idx, fill_value=np.nan):
257+
def take_2d_multi(arr, row_idx, col_idx, fill_value=np.nan, out=None):
258258

259259
dtype_str = arr.dtype.name
260260

@@ -267,21 +267,26 @@ def take_2d_multi(arr, row_idx, col_idx, fill_value=np.nan):
267267

268268
if needs_masking:
269269
return take_2d_multi(_maybe_upcast(arr), row_idx, col_idx,
270-
fill_value=fill_value)
270+
fill_value=fill_value, out=out)
271271
else:
272-
out = np.empty(out_shape, dtype=arr.dtype)
272+
if out is None:
273+
out = np.empty(out_shape, dtype=arr.dtype)
273274
take_f = _get_take2d_function(dtype_str, axis='multi')
274275
take_f(arr, _ensure_int64(row_idx),
275276
_ensure_int64(col_idx), out=out,
276277
fill_value=fill_value)
277278
return out
278279
elif dtype_str in ('float64', 'object', 'datetime64[ns]'):
279-
out = np.empty(out_shape, dtype=arr.dtype)
280+
if out is None:
281+
out = np.empty(out_shape, dtype=arr.dtype)
280282
take_f = _get_take2d_function(dtype_str, axis='multi')
281283
take_f(arr, _ensure_int64(row_idx), _ensure_int64(col_idx), out=out,
282284
fill_value=fill_value)
283285
return out
284286
else:
287+
if out is not None:
288+
raise ValueError('Cannot pass out in this case')
289+
285290
return take_2d(take_2d(arr, row_idx, axis=0, fill_value=fill_value),
286291
col_idx, axis=1, fill_value=fill_value)
287292

pandas/core/indexing.py

+16-8
Original file line numberDiff line numberDiff line change
@@ -141,26 +141,34 @@ def _getitem_tuple(self, tup):
141141
return retval
142142

143143
def _multi_take_opportunity(self, tup):
144-
from pandas.core.frame import DataFrame
144+
from pandas.core.generic import NDFrame
145145

146146
# ugly hack for GH #836
147-
if not isinstance(self.obj, DataFrame):
147+
if not isinstance(self.obj, NDFrame):
148148
return False
149149

150150
if not all(_is_list_like(x) for x in tup):
151151
return False
152152

153153
# just too complicated
154-
if (isinstance(self.obj.index, MultiIndex) or
155-
isinstance(self.obj.columns, MultiIndex)):
156-
return False
154+
for ax in self.obj._data.axes:
155+
if isinstance(ax, MultiIndex):
156+
return False
157157

158158
return True
159159

160160
def _multi_take(self, tup):
161-
index = self._convert_for_reindex(tup[0], axis=0)
162-
columns = self._convert_for_reindex(tup[1], axis=1)
163-
return self.obj.reindex(index=index, columns=columns)
161+
from pandas.core.frame import DataFrame
162+
from pandas.core.panel import Panel
163+
164+
if isinstance(self.obj, DataFrame):
165+
index = self._convert_for_reindex(tup[0], axis=0)
166+
columns = self._convert_for_reindex(tup[1], axis=1)
167+
return self.obj.reindex(index=index, columns=columns)
168+
elif isinstance(self.obj, Panel):
169+
conv = [self._convert_for_reindex(x, axis=i)
170+
for i, x in enumerate(tup)]
171+
return self.obj.reindex(items=tup[0], major=tup[1], minor=tup[2])
164172

165173
def _convert_for_reindex(self, key, axis=0):
166174
labels = self.obj._get_axis(axis)

pandas/core/panel.py

+30
Original file line numberDiff line numberDiff line change
@@ -682,6 +682,10 @@ def reindex(self, major=None, items=None, minor=None, method=None,
682682
major = _mut_exclusive(major, major_axis)
683683
minor = _mut_exclusive(minor, minor_axis)
684684

685+
if (method is None and not self._is_mixed_type and
686+
com._count_not_none(items, major, minor) == 3):
687+
return self._reindex_multi(items, major, minor)
688+
685689
if major is not None:
686690
result = result._reindex_axis(major, method, 1, copy)
687691

@@ -696,6 +700,32 @@ def reindex(self, major=None, items=None, minor=None, method=None,
696700

697701
return result
698702

703+
def _reindex_multi(self, items, major, minor):
704+
a0, a1, a2 = len(items), len(major), len(minor)
705+
706+
values = self.values
707+
new_values = np.empty((a0, a1, a2), dtype=values.dtype)
708+
709+
new_items, indexer0 = self.items.reindex(items)
710+
new_major, indexer1 = self.major_axis.reindex(major)
711+
new_minor, indexer2 = self.minor_axis.reindex(minor)
712+
713+
if indexer0 is None:
714+
indexer0 = range(len(new_items))
715+
716+
if indexer1 is None:
717+
indexer1 = range(len(new_major))
718+
719+
if indexer2 is None:
720+
indexer2 = range(len(new_minor))
721+
722+
for i, ind in enumerate(indexer0):
723+
com.take_2d_multi(values[ind], indexer1, indexer2,
724+
out=new_values[i])
725+
726+
return Panel(new_values, items=new_items, major_axis=new_major,
727+
minor_axis=new_minor)
728+
699729
def reindex_axis(self, labels, axis=0, method=None, level=None, copy=True):
700730
"""Conform Panel to new index with optional filling logic, placing
701731
NA/NaN in locations having no value in the previous index. A new object

vb_suite/indexing.py

+12-1
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,18 @@
103103
midx = MultiIndex.from_arrays([a, b])
104104
midx = midx.take(np.random.permutation(np.arange(100000)))
105105
"""
106-
sort_level_zero = Benchmark("midx.sortlevel(0)", setup,
106+
sort_level_zero = Benchmark("midx.sortlevel(0)", setup,
107107
start_date=datetime(2012,1,1))
108108
sort_level_one = Benchmark("midx.sortlevel(1)", setup,
109109
start_date=datetime(2012,1,1))
110+
111+
#----------------------------------------------------------------------
112+
# Panel subset selection
113+
114+
setup = common_setup + """
115+
p = Panel(np.random.randn(500, 500, 500))
116+
inds = range(0, 500, 10)
117+
"""
118+
119+
indexing_panel_subset = Benchmark('p.ix[inds, inds, inds]', setup,
120+
start_date=datetime(2012, 1, 1))

0 commit comments

Comments
 (0)