Skip to content

Commit 65c0cf1

Browse files
committed
ENH: start enabling proper fancy indexing with multi-level index
1 parent 4109135 commit 65c0cf1

File tree

5 files changed

+67
-67
lines changed

5 files changed

+67
-67
lines changed

pandas/core/frame.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -951,7 +951,7 @@ def dropna(self, axis=0, how='any', thresh=None, subset=None):
951951
how : {'any', 'all'}
952952
any : if any NA values are present, drop that label
953953
all : if all values are NA, drop that label
954-
thresh : {'any', 'all', int}
954+
thresh : int, default None
955955
int value : require that many non-NA values
956956
subset : array-like
957957

pandas/core/index.py

+23-11
Original file line numberDiff line numberDiff line change
@@ -375,7 +375,6 @@ class MultiIndex(Index):
375375
"""
376376
Implements multi-level, a.k.a. hierarchical, index object for pandas objects
377377
378-
379378
Parameters
380379
----------
381380
levels : list or tuple of arrays
@@ -559,29 +558,42 @@ def get_tuple_index(self):
559558
def slice_locs(self, start=None, end=None):
560559
"""
561560
562-
563561
Returns
564562
-------
565563
566564
Notes
567565
-----
568-
This function assumes that the data is sorted, so use at your own peril
566+
This function assumes that the data is sorted by the first level
569567
"""
568+
level0 = self.levels[0]
569+
570570
if start is None:
571-
beg_slice = 0
572-
elif start in self:
573-
beg_slice = self.indexMap[start]
571+
start_slice = 0
572+
elif isinstance(start, tuple):
573+
pass
574574
else:
575-
beg_slice = self.searchsorted(start, side='left')
575+
try:
576+
start_label = level0.indexMap[start]
577+
except KeyError:
578+
start_label = level0.searchsorted(start)
579+
580+
start_slice = self.labels[0].searchsorted(start_label)
576581

577582
if end is None:
578583
end_slice = len(self)
579-
elif end in self.indexMap:
580-
end_slice = self.indexMap[end] + 1
584+
elif isinstance(end, tuple):
585+
pass
581586
else:
582-
end_slice = self.searchsorted(end, side='right')
587+
try:
588+
end_label = level0.indexMap[end]
589+
except KeyError:
590+
end_label = level0.searchsorted(end, side='right')
591+
if end_label > 0:
592+
end_label -= 1
583593

584-
return beg_slice, end_slice
594+
end_slice = self.labels[0].searchsorted(end_label, side='right')
595+
596+
return start_slice, end_slice
585597

586598
def truncate(self, before=None, after=None):
587599
"""

pandas/core/indexing.py

-5
Original file line numberDiff line numberDiff line change
@@ -298,9 +298,4 @@ def crit(x):
298298
def _need_slice(obj):
299299
return obj.start is not None or obj.stop is not None
300300

301-
# I don't think this is necessary
302-
# def _check_step(obj):
303-
# if obj.step is not None and obj.step != 1:
304-
# raise Exception('steps other than 1 are not supported')
305-
306301
_isboolarr = lambda x: np.asarray(x).dtype == np.bool_

pandas/core/series.py

+38-41
Original file line numberDiff line numberDiff line change
@@ -41,19 +41,15 @@ def wrapper(self, other):
4141
if self.index.equals(other.index):
4242
return Series(op(self.values, other.values), index=self.index)
4343

44-
newIndex = self.index + other.index
45-
46-
try:
47-
this_reindexed = self.reindex(newIndex)
48-
other_reindexed = other.reindex(newIndex)
49-
arr = op(this_reindexed.values, other_reindexed.values)
50-
except Exception:
51-
arr = Series.combine(self, other, getattr(type(self[0]), name))
52-
result = Series(arr, index=newIndex)
53-
return result
44+
new_index = self.index + other.index
45+
this_reindexed = self.reindex(new_index)
46+
other_reindexed = other.reindex(new_index)
47+
arr = op(this_reindexed.values, other_reindexed.values)
48+
return Series(arr, index=new_index)
5449
elif isinstance(other, DataFrame):
5550
return NotImplemented
5651
else:
52+
# scalars
5753
return Series(op(self.values, other), index=self.index)
5854
return wrapper
5955

@@ -84,7 +80,7 @@ def f(self, other, fill_value=None):
8480

8581
class Series(np.ndarray, PandasObject):
8682
"""
87-
Generic indexed (labeled) vector (time series or cross-section)
83+
Generic indexed (labeled) vector, including time series
8884
8985
Contains values in a numpy-ndarray with an optional bound index
9086
(also an array of dates, strings, or whatever you want the 'row
@@ -124,7 +120,7 @@ class Series(np.ndarray, PandasObject):
124120

125121
_AXIS_NAMES = dict((v, k) for k, v in _AXIS_NUMBERS.iteritems())
126122

127-
def __new__(cls, data, index=None, dtype=None, copy=False):
123+
def __new__(cls, data, index=None, dtype=None, name=None, copy=False):
128124
if isinstance(data, Series):
129125
if index is None:
130126
index = data.index
@@ -178,6 +174,7 @@ def __new__(cls, data, index=None, dtype=None, copy=False):
178174
# Change the class of the array to be the subclass type.
179175
subarr = subarr.view(cls)
180176
subarr.index = index
177+
subarr.name = name
181178

182179
if subarr.index.is_all_dates():
183180
subarr = subarr.view(TimeSeries)
@@ -271,12 +268,7 @@ def __getitem__(self, key):
271268
except TypeError:
272269
pass
273270

274-
# boolean indexing, need to check that the data are aligned, otherwise
275-
# disallowed
276-
if isinstance(key, Series) and key.dtype == np.bool_:
277-
if not key.index.equals(self.index):
278-
raise Exception('can only boolean index with like-indexed '
279-
'Series or raw ndarrays')
271+
self._check_bool_indexer(key)
280272

281273
def _index_with(indexer):
282274
return Series(self.values[indexer],
@@ -338,7 +330,7 @@ def get(self, key, default=None):
338330
y : scalar
339331
"""
340332
if key in self.index:
341-
return self._get_val_at(self.index.indexMap[key])
333+
return self._get_val_at(self.index.get_loc(key))
342334
else:
343335
return default
344336

@@ -361,7 +353,7 @@ def __getslice__(self, i, j):
361353
def __setitem__(self, key, value):
362354
values = self.values
363355
try:
364-
loc = self.index.indexMap[key]
356+
loc = self.index.get_loc(key)
365357
values[loc] = value
366358
return
367359
except KeyError:
@@ -373,12 +365,7 @@ def __setitem__(self, key, value):
373365
# Could not hash item
374366
pass
375367

376-
# boolean indexing, need to check that the data are aligned, otherwise
377-
# disallowed
378-
if isinstance(key, Series) and key.dtype == np.bool_:
379-
if not key.index.equals(self.index):
380-
raise Exception('can only boolean index with like-indexed '
381-
'Series or raw ndarrays')
368+
self._check_bool_indexer(key)
382369

383370
# special handling of boolean data with NAs stored in object
384371
# arrays. Sort of an elaborate hack since we can't represent boolean
@@ -396,6 +383,14 @@ def __setitem__(self, key, value):
396383

397384
values[key] = value
398385

386+
def _check_bool_indexer(self, key):
387+
# boolean indexing, need to check that the data are aligned, otherwise
388+
# disallowed
389+
if isinstance(key, Series) and key.dtype == np.bool_:
390+
if not key.index.equals(self.index):
391+
raise Exception('can only boolean index with like-indexed '
392+
'Series or raw ndarrays')
393+
399394
def __setslice__(self, i, j, value):
400395
"""Set slice equal to given value(s)"""
401396
ndarray.__setslice__(self, i, j, value)
@@ -743,12 +738,12 @@ def append(self, other):
743738
-------
744739
y : Series
745740
"""
746-
newIndex = np.concatenate((self.index, other.index))
747-
newIndex = Index(newIndex)
748-
newIndex._verify_integrity()
741+
new_index = np.concatenate((self.index, other.index))
742+
new_index = Index(new_index)
743+
new_index._verify_integrity()
749744

750745
new_values = np.concatenate((self, other))
751-
return Series(new_values, index=newIndex)
746+
return Series(new_values, index=new_index)
752747

753748
def _binop(self, other, func, fill_value=None):
754749
"""
@@ -811,17 +806,17 @@ def combine(self, other, func, fill_value=nan):
811806
result : Series
812807
"""
813808
if isinstance(other, Series):
814-
newIndex = self.index + other.index
809+
new_index = self.index + other.index
815810

816-
new_values = np.empty(len(newIndex), dtype=self.dtype)
817-
for i, idx in enumerate(newIndex):
811+
new_values = np.empty(len(new_index), dtype=self.dtype)
812+
for i, idx in enumerate(new_index):
818813
new_values[i] = func(self.get(idx, fill_value),
819814
other.get(idx, fill_value))
820815
else:
821-
newIndex = self.index
816+
new_index = self.index
822817
new_values = func(self.values, other)
823818

824-
return Series(new_values, index=newIndex)
819+
return Series(new_values, index=new_index)
825820

826821
def combineFirst(self, other):
827822
"""
@@ -837,16 +832,16 @@ def combineFirst(self, other):
837832
formed as union of two Series
838833
"""
839834
if self.index.equals(other.index):
840-
newIndex = self.index
835+
new_index = self.index
841836
# save ourselves the copying in this case
842837
this = self
843838
else:
844-
newIndex = self.index + other.index
839+
new_index = self.index + other.index
845840

846-
this = self.reindex(newIndex)
847-
other = other.reindex(newIndex)
841+
this = self.reindex(new_index)
842+
other = other.reindex(new_index)
848843

849-
result = Series(np.where(isnull(this), other, this), index=newIndex)
844+
result = Series(np.where(isnull(this), other, this), index=new_index)
850845
return result
851846

852847
#----------------------------------------------------------------------
@@ -896,7 +891,9 @@ def _try_mergesort(arr):
896891
# stable sort not available for object dtype
897892
return arr.argsort()
898893

899-
if 'missingAtEnd' in kwds:
894+
if 'missingAtEnd' in kwds: # pragma: no cover
895+
warnings.warn("missingAtEnd is deprecated, use na_last",
896+
FutureWarning)
900897
na_last = kwds['missingAtEnd']
901898

902899
arr = self.values

pandas/tests/test_series.py

+5-9
Original file line numberDiff line numberDiff line change
@@ -451,11 +451,11 @@ def test_operators_corner(self):
451451
result = empty + Series([], index=Index([]))
452452
self.assert_(len(result) == 0)
453453

454-
deltas = Series([timedelta(1)] * 5, index=np.arange(5))
455-
sub_deltas = deltas[::2]
456-
457-
deltas5 = deltas * 5
458-
deltas = deltas + sub_deltas
454+
# TODO: this returned NotImplemented earlier, what to do?
455+
# deltas = Series([timedelta(1)] * 5, index=np.arange(5))
456+
# sub_deltas = deltas[::2]
457+
# deltas5 = deltas * 5
458+
# deltas = deltas + sub_deltas
459459

460460
# float + int
461461
int_ts = self.ts.astype(int)[:-5]
@@ -627,7 +627,6 @@ def test_sort(self):
627627
self.assert_(np.array_equal(ts.index, self.ts.order().index))
628628

629629
def test_order(self):
630-
631630
ts = self.ts.copy()
632631
ts[:5] = np.NaN
633632
vals = ts.values
@@ -640,9 +639,6 @@ def test_order(self):
640639
self.assert_(np.isnan(result[:5]).all())
641640
self.assert_(np.array_equal(result[5:], np.sort(vals[5:])))
642641

643-
# just want to make sure it works
644-
result = ts.order(missingAtEnd=False)
645-
646642
# something object-type
647643
ser = Series(['A', 'B'], [1, 2])
648644
# no failure

0 commit comments

Comments
 (0)