Skip to content

Commit 394bb0d

Browse files
committed
ENH: add Panel.take, implement set ops between MultiIndex and Index. plus test coverage
1 parent eddd5c9 commit 394bb0d

12 files changed

+161
-89
lines changed

RELEASE.rst

+2
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@ feedback on the library.
128128
- Added `pivot_table` convenience function to pandas namespace (GH #234)
129129
- Implemented `Panel.rename_axis` function (GH #243)
130130
- DataFrame will show index level names in console output
131+
- Implemented `Panel.take`
131132

132133
**Improvements to existing features**
133134

@@ -189,6 +190,7 @@ feedback on the library.
189190
issue GH #262
190191
- Can pass list of tuples to `Series` (GH #270)
191192
- Can pass level name to `DataFrame.stack`
193+
- Support set operations between MultiIndex and Index
192194

193195
Thanks
194196
------

pandas/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
try:
1010
import pandas._tseries as lib
11-
except Exception, e:
11+
except Exception, e: # pragma: no cover
1212
if 'No module named' in e.message:
1313
raise ImportError('C extensions not built: if you installed already '
1414
'verify that you are not importing from the source '

pandas/core/frame.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2308,7 +2308,7 @@ def count(self, axis=0, level=None, numeric_only=False):
23082308
else:
23092309
frame = self
23102310

2311-
result = frame.apply(Series.count, axis=axis)
2311+
result = DataFrame.apply(frame, Series.count, axis=axis)
23122312

23132313
# what happens with empty DataFrame
23142314
if isinstance(result, DataFrame):

pandas/core/generic.py

+23-1
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,7 @@ def sort_index(self, axis=0, ascending=True):
204204
def ix(self):
205205
raise NotImplementedError
206206

207-
def reindex(self, **kwds):
207+
def reindex(self, *args, **kwds):
208208
raise NotImplementedError
209209

210210
class NDFrame(PandasObject):
@@ -486,3 +486,25 @@ def rename_axis(self, mapper, axis=0, copy=True):
486486
new_data = new_data.copy()
487487

488488
return self._constructor(new_data)
489+
490+
def take(self, indices, axis=0):
491+
"""
492+
Analogous to ndarray.take
493+
494+
Parameters
495+
----------
496+
indices : list / array of ints
497+
axis : int, default 0
498+
499+
Returns
500+
-------
501+
taken : type of caller
502+
"""
503+
if axis == 0:
504+
labels = self._get_axis(axis)
505+
new_items = labels.take(indices)
506+
new_data = self._data.reindex_items(new_items)
507+
else:
508+
new_data = self._data.take(indices, axis=axis)
509+
return self._constructor(new_data)
510+

pandas/core/index.py

+35-45
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,11 @@ class Index(np.ndarray):
3939
----
4040
An Index instance can **only** contain hashable objects
4141
"""
42+
_map_indices = lib.map_indices_object
43+
_is_monotonic = lib.is_monotonic_object
44+
_groupby = lib.groupby_object
45+
_arrmap = lib.arrmap_object
46+
4247
name = None
4348
def __new__(cls, data, dtype=None, copy=False, name=None):
4449
if isinstance(data, np.ndarray):
@@ -67,6 +72,10 @@ def dtype(self):
6772
def nlevels(self):
6873
return 1
6974

75+
@property
76+
def _constructor(self):
77+
return Index
78+
7079
def summary(self):
7180
if len(self) > 0:
7281
index_summary = ', %s to %s' % (str(self[0]), str(self[-1]))
@@ -82,15 +91,16 @@ def values(self):
8291

8392
@cache_readonly
8493
def is_monotonic(self):
85-
return lib.is_monotonic_object(self)
94+
return self._is_monotonic(self)
8695

8796
_indexMap = None
8897
_integrity = False
98+
8999
@property
90100
def indexMap(self):
91101
"{label -> location}"
92102
if self._indexMap is None:
93-
self._indexMap = lib.map_indices_object(self)
103+
self._indexMap = self._map_indices(self)
94104
self._integrity = len(self._indexMap) == len(self)
95105

96106
if not self._integrity:
@@ -185,7 +195,7 @@ def take(self, *args, **kwargs):
185195
Analogous to ndarray.take
186196
"""
187197
taken = self.view(np.ndarray).take(*args, **kwargs)
188-
return Index(taken, name=self.name)
198+
return self._constructor(taken, name=self.name)
189199

190200
def format(self, name=False):
191201
"""
@@ -305,7 +315,7 @@ def union(self, other):
305315
return _ensure_index(other)
306316

307317
if self.is_monotonic and other.is_monotonic:
308-
result = lib.outer_join_indexer_object(self, other)[0]
318+
result = lib.outer_join_indexer_object(self, other.values)[0]
309319
else:
310320
indexer = self.get_indexer(other)
311321
indexer = (indexer == -1).nonzero()[0]
@@ -356,9 +366,10 @@ def intersection(self, other):
356366
other = other.astype(object)
357367

358368
if self.is_monotonic and other.is_monotonic:
359-
return Index(lib.inner_join_indexer_object(self, other)[0])
369+
return Index(lib.inner_join_indexer_object(self,
370+
other.values)[0])
360371
else:
361-
indexer = self.get_indexer(other)
372+
indexer = self.get_indexer(other.values)
362373
indexer = indexer.take((indexer != -1).nonzero()[0])
363374
return self.take(indexer)
364375

@@ -446,10 +457,10 @@ def get_indexer(self, target, method=None):
446457
return indexer
447458

448459
def groupby(self, to_groupby):
449-
return lib.groupby_object(self.values, to_groupby)
460+
return self._groupby(self.values, to_groupby)
450461

451462
def map(self, mapper):
452-
return lib.arrmap_object(self.values, mapper)
463+
return self._arrmap(self.values, mapper)
453464

454465
def _get_method(self, method):
455466
if method:
@@ -621,6 +632,11 @@ def copy(self, order='C'):
621632

622633
class Int64Index(Index):
623634

635+
_map_indices = lib.map_indices_int64
636+
_is_monotonic = lib.is_monotonic_int64
637+
_groupby = lib.groupby_int64
638+
_arrmap = lib.arrmap_int64
639+
624640
def __new__(cls, data, dtype=None, copy=False, name=None):
625641
if not isinstance(data, np.ndarray):
626642
if np.isscalar(data):
@@ -648,29 +664,17 @@ def __new__(cls, data, dtype=None, copy=False, name=None):
648664
subarr.name = name
649665
return subarr
650666

667+
@property
668+
def _constructor(self):
669+
return Int64Index
670+
651671
def astype(self, dtype):
652672
return Index(self.values.astype(dtype))
653673

654674
@property
655675
def dtype(self):
656676
return np.dtype('int64')
657677

658-
@cache_readonly
659-
def is_monotonic(self):
660-
return lib.is_monotonic_int64(self)
661-
662-
@property
663-
def indexMap(self):
664-
"{label -> location}"
665-
if self._indexMap is None:
666-
self._indexMap = lib.map_indices_int64(self)
667-
self._integrity = len(self._indexMap) == len(self)
668-
669-
if not self._integrity:
670-
raise Exception('Index cannot contain duplicate values!')
671-
672-
return self._indexMap
673-
674678
def is_all_dates(self):
675679
"""
676680
Checks that all the labels are datetime objects
@@ -771,19 +775,6 @@ def union(self, other):
771775
return Int64Index(result)
772776
union.__doc__ = Index.union.__doc__
773777

774-
def groupby(self, to_groupby):
775-
return lib.groupby_int64(self, to_groupby)
776-
777-
def map(self, mapper):
778-
return lib.arrmap_int64(self, mapper)
779-
780-
def take(self, *args, **kwargs):
781-
"""
782-
Analogous to ndarray.take
783-
"""
784-
taken = self.values.take(*args, **kwargs)
785-
return Int64Index(taken, name=self.name)
786-
787778
class DateIndex(Index):
788779
pass
789780

@@ -1267,16 +1258,9 @@ def get_indexer(self, target, method=None):
12671258
"""
12681259
method = self._get_method(method)
12691260

1261+
target_index = target
12701262
if isinstance(target, MultiIndex):
12711263
target_index = target.get_tuple_index()
1272-
else:
1273-
if len(target) > 0:
1274-
val = target[0]
1275-
if not isinstance(val, tuple) or len(val) != self.nlevels:
1276-
raise ValueError('can only pass MultiIndex or '
1277-
'array of tuples')
1278-
1279-
target_index = target
12801264

12811265
self_index = self.get_tuple_index()
12821266

@@ -1509,6 +1493,9 @@ def union(self, other):
15091493
-------
15101494
Index
15111495
"""
1496+
if not isinstance(other, MultiIndex):
1497+
return other.union(self)
1498+
15121499
self._assert_can_do_setop(other)
15131500

15141501
if len(other) == 0 or self.equals(other):
@@ -1533,6 +1520,9 @@ def intersection(self, other):
15331520
-------
15341521
Index
15351522
"""
1523+
if not isinstance(other, MultiIndex):
1524+
return other.intersection(self)
1525+
15361526
self._assert_can_do_setop(other)
15371527

15381528
if self.equals(other):

pandas/core/internals.py

+10-19
Original file line numberDiff line numberDiff line change
@@ -176,31 +176,19 @@ def should_store(self, value):
176176
# unnecessarily
177177
return issubclass(value.dtype.type, np.floating)
178178

179-
def can_store(self, value):
180-
return issubclass(value.dtype.type, (np.integer, np.floating))
181-
182179
class IntBlock(Block):
183180

184181
def should_store(self, value):
185-
return self.can_store(value)
186-
187-
def can_store(self, value):
188182
return issubclass(value.dtype.type, np.integer)
189183

190184
class BoolBlock(Block):
191185

192186
def should_store(self, value):
193-
return self.can_store(value)
194-
195-
def can_store(self, value):
196187
return issubclass(value.dtype.type, np.bool_)
197188

198189
class ObjectBlock(Block):
199190

200191
def should_store(self, value):
201-
return self.can_store(value)
202-
203-
def can_store(self, value):
204192
return not issubclass(value.dtype.type,
205193
(np.integer, np.floating, np.bool_))
206194

@@ -676,21 +664,24 @@ def reindex_items(self, new_items):
676664

677665
return BlockManager(new_blocks, new_axes)
678666

679-
def take(self, indexer, axis=1, pandas_indexer=False):
667+
def take(self, indexer, axis=1):
680668
if axis == 0:
681669
raise NotImplementedError
682670

683-
if pandas_indexer:
684-
take_f = lambda arr: common.take_fast(arr, indexer,
685-
None, False, axis=axis)
686-
else:
687-
take_f = lambda arr: arr.take(indexer, axis=axis)
671+
indexer = np.asarray(indexer, dtype='i4')
672+
673+
n = len(self.axes[axis])
674+
if ((indexer == -1) | (indexer >= n)).any():
675+
raise Exception('Indices must be nonzero and less than '
676+
'the axis length')
688677

689678
new_axes = list(self.axes)
690679
new_axes[axis] = self.axes[axis].take(indexer)
691680
new_blocks = []
692681
for blk in self.blocks:
693-
newb = make_block(take_f(blk.values), blk.items, self.items)
682+
new_values = common.take_fast(blk.values, indexer,
683+
None, False, axis=axis)
684+
newb = make_block(new_values, blk.items, self.items)
694685
new_blocks.append(newb)
695686

696687
return BlockManager(new_blocks, new_axes)

pandas/core/panel.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -665,7 +665,8 @@ def fillna(self, value=None, method='pad'):
665665

666666
try:
667667
divide = div = _panel_arith_method(operator.div, 'divide')
668-
except AttributeError: # Python 3
668+
except AttributeError: # pragma: no cover
669+
# Python 3
669670
divide = div = _panel_arith_method(operator.truediv, 'divide')
670671

671672
def major_xs(self, key, copy=True):
@@ -1235,7 +1236,8 @@ def _combine_panel_frame(self, other, func, axis='items'):
12351236

12361237
try:
12371238
divide = div = _panel_arith_method(operator.div, 'divide')
1238-
except AttributeError: # Python 3
1239+
except AttributeError: # pragma: no cover
1240+
# Python 3
12391241
divide = div = _panel_arith_method(operator.truediv, 'divide')
12401242

12411243
def to_wide(self):

pandas/core/reshape.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -287,6 +287,9 @@ def stack(frame, level=-1, dropna=True):
287287
stacked : Series
288288
"""
289289
N, K = frame.shape
290+
if isinstance(level, int) and level < 0:
291+
level += frame.columns.nlevels
292+
290293
level = frame.columns._get_level_number(level)
291294

292295
if isinstance(frame.columns, MultiIndex):
@@ -318,8 +321,6 @@ def stack(frame, level=-1, dropna=True):
318321

319322
def _stack_multi_columns(frame, level=-1, dropna=True):
320323
this = frame.copy()
321-
if level < 0:
322-
level += frame.columns.nlevels
323324

324325
# this makes life much simpler
325326
if level != frame.columns.nlevels - 1:

0 commit comments

Comments
 (0)