Skip to content

Commit 0f59729

Browse files
committed
ENH: AmbiguousIndexError not raised anymore, prefer label-based indexing, other multi-lev compat
1 parent 084bbcb commit 0f59729

File tree

8 files changed

+159
-123
lines changed

8 files changed

+159
-123
lines changed

pandas/core/common.py

+7
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,13 @@ def _pfixed(s, space, nanRep=None, float_format=None):
200200
else:
201201
return (' %s' % s)[:space].ljust(space)
202202

203+
def _stringify(col):
204+
# unicode workaround
205+
if isinstance(col, tuple):
206+
return str(col)
207+
else:
208+
return '%s' % col
209+
203210
def _format(s, nanRep=None, float_format=None):
204211
if isinstance(s, float):
205212
if nanRep is not None and isnull(s):

pandas/core/frame.py

+28-44
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,11 @@
2222

2323
from pandas.core.common import (isnull, notnull, PandasError, _ensure_index,
2424
_try_sort, _pfixed, _default_index,
25-
_infer_dtype)
25+
_infer_dtype, _stringify)
2626
from pandas.core.daterange import DateRange
2727
from pandas.core.generic import AxisProperty, NDFrame
2828
from pandas.core.index import Index, MultiIndex, NULL_INDEX
29+
from pandas.core.indexing import _DataFrameIndexer, _maybe_droplevels
2930
from pandas.core.internals import BlockManager, make_block, form_blocks
3031
from pandas.core.series import Series, _is_bool_indexer
3132
import pandas.core.common as common
@@ -242,6 +243,15 @@ def axes(self):
242243
def _constructor(self):
243244
return DataFrame
244245

246+
# Fancy indexing
247+
_ix = None
248+
@property
249+
def ix(self):
250+
if self._ix is None:
251+
self._ix = _DataFrameIndexer(self)
252+
253+
return self._ix
254+
245255
#----------------------------------------------------------------------
246256
# Class behavior
247257

@@ -502,7 +512,7 @@ def _myformat(v):
502512
if formatters is None:
503513
formatters = {}
504514

505-
def _stringify(col):
515+
def _format_col(col):
506516
formatter = formatters.get(col, _myformat)
507517
return [formatter(x) for x in self[col]]
508518

@@ -519,7 +529,7 @@ def _stringify(col):
519529
else:
520530
(str_index,
521531
str_columns) = self._get_formatted_labels(sparsify=sparsify)
522-
stringified = [str_columns[i] + _stringify(c)
532+
stringified = [str_columns[i] + _format_col(c)
523533
for i, c in enumerate(columns)]
524534
to_write.append(adjoin(1, str_index, *stringified))
525535

@@ -567,13 +577,6 @@ def info(self, verbose=True, buf=sys.stdout):
567577

568578
cols = self.columns
569579

570-
def _stringify(col):
571-
# unicode workaround
572-
if isinstance(col, tuple):
573-
return str(col)
574-
else:
575-
return '%s' % col
576-
577580
if verbose:
578581
print >> buf, unicode('Data columns:')
579582
space = max([len(_stringify(k)) for k in self.columns]) + 4
@@ -728,9 +731,7 @@ def __getitem__(self, key):
728731
Examples
729732
--------
730733
column = dm['A']
731-
732734
dmSlice = dm[:20] # First 20 rows
733-
734735
dmSelect = dm[dm.count(axis=1) > 10]
735736
736737
Notes
@@ -752,27 +753,21 @@ def __getitem__(self, key):
752753
new_index = self.index[key]
753754
return self.reindex(new_index)
754755
elif isinstance(self.columns, MultiIndex):
755-
loc = self.columns.get_loc(key)
756-
if isinstance(loc, slice):
757-
new_columns = self.columns[loc]
758-
result = self.reindex(columns=new_columns)
759-
760-
# drop levels
761-
if isinstance(key, tuple):
762-
for _ in key:
763-
new_columns = new_columns.droplevel(0)
764-
else:
765-
new_columns = new_columns.droplevel(0)
766-
767-
result.columns = new_columns
768-
return result
769-
else:
770-
return self._getitem_single(key)
756+
return self._getitem_multilevel(key)
771757
else:
772758
return self._getitem_single(key)
773759

774760
def _getitem_multilevel(self, key):
775-
pass
761+
loc = self.columns.get_loc(key)
762+
if isinstance(loc, slice):
763+
new_columns = self.columns[loc]
764+
result = self.reindex(columns=new_columns)
765+
766+
# HACK: need a more general way of addressing this problem
767+
result.columns = _maybe_droplevels(new_columns, key)
768+
return result
769+
else:
770+
return self._getitem_single(key)
776771

777772
def _getitem_single(self, key):
778773
values = self._data.get(key)
@@ -907,7 +902,9 @@ def xs(self, key, copy=True):
907902
if new_data.ndim == 1:
908903
return Series(new_data.as_matrix(), index=self.columns)
909904
else:
910-
return DataFrame(new_data)
905+
result = DataFrame(new_data)
906+
result.index = _maybe_droplevels(result.index, key)
907+
return result
911908

912909
#----------------------------------------------------------------------
913910
# Reindexing
@@ -2714,18 +2711,6 @@ def dropIncompleteRows(self, specificColumns=None,
27142711
else:
27152712
return self.dropna(axis=0, subset=specificColumns, thresh=minObs)
27162713

2717-
#----------------------------------------------------------------------
2718-
# Fancy indexing
2719-
2720-
_ix = None
2721-
@property
2722-
def ix(self):
2723-
from pandas.core.indexing import _DataFrameIndexer
2724-
if self._ix is None:
2725-
self._ix = _DataFrameIndexer(self)
2726-
2727-
return self._ix
2728-
27292714

27302715
def group_agg(values, bounds, f):
27312716
"""
@@ -2807,7 +2792,7 @@ def _union_if(index, new_index):
28072792
index = index.union(new_index)
28082793
return index
28092794

2810-
def _get_index(obj):
2795+
def _get_index(v):
28112796
if isinstance(v, Series):
28122797
return v.index
28132798
elif isinstance(v, dict):
@@ -2871,7 +2856,6 @@ def _prep_ndarray(values, copy=True):
28712856

28722857
return values
28732858

2874-
28752859
def _rec_to_dict(arr):
28762860
columns = list(arr.dtype.names)
28772861
sdict = dict((k, arr[k]) for k in columns)

pandas/core/index.py

+13-14
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
import numpy as np
77

8-
from pandas.core.common import (_format, adjoin as _adjoin,
8+
from pandas.core.common import (_format, adjoin as _adjoin, _stringify,
99
_ensure_index, _is_bool_indexer)
1010
import pandas.core.common as common
1111
import pandas._tseries as _tseries
@@ -47,9 +47,6 @@ def __new__(cls, data, dtype=object, copy=False):
4747
else:
4848
subarr = np.empty(len(data), dtype=dtype)
4949
subarr[:] = data
50-
51-
# assert(subarr.ndim == 1)
52-
5350
return subarr.view(cls)
5451

5552
def summary(self):
@@ -59,19 +56,21 @@ def summary(self):
5956
index_summary = ''
6057
return 'Index: %s entries%s' % (len(self), index_summary)
6158

59+
_indexMap = None
6260
@property
6361
def indexMap(self):
64-
if not hasattr(self, '_cache_indexMap'):
65-
self._cache_indexMap = _tseries.map_indices_buf(self)
62+
if self._indexMap is None:
63+
self._indexMap = _tseries.map_indices_buf(self)
6664
self._verify_integrity()
6765

68-
return self._cache_indexMap
66+
return self._indexMap
6967

68+
_allDates = None
7069
def is_all_dates(self):
71-
if not hasattr(self, '_cache_allDates'):
72-
self._cache_allDates = _tseries.isAllDates(self)
70+
if self._allDates is None:
71+
self._allDates = _tseries.isAllDates(self)
7372

74-
return self._cache_allDates
73+
return self._allDates
7574

7675
def _verify_integrity(self):
7776
if len(self.indexMap) < len(self):
@@ -128,7 +127,7 @@ def format(self, vertical=False):
128127
to_join.append(dt.strftime("%Y-%m-%d"))
129128
return to_join
130129

131-
return ['%s' % x for x in self]
130+
return [_stringify(x) for x in self]
132131

133132
def equals(self, other):
134133
"""
@@ -425,12 +424,12 @@ def from_tuples(cls, tuples, sortorder=None):
425424

426425
@property
427426
def indexMap(self):
428-
if not hasattr(self, '_cache_indexMap'):
427+
if self._indexMap is None:
429428
zipped = zip(*self.labels)
430-
self._cache_indexMap = _tseries.map_indices_list(zipped)
429+
self._indexMap = _tseries.map_indices_list(zipped)
431430
self._verify_integrity()
432431

433-
return self._cache_indexMap
432+
return self._indexMap
434433

435434
@property
436435
def nlevels(self):

pandas/core/indexing.py

+42-29
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,5 @@
11
import numpy as np
22

3-
from pandas.core.frame import DataFrame
4-
from pandas.core.series import Series
5-
63
class _SeriesIndexer(object):
74
"""
85
Class to support fancy indexing, potentially using labels
@@ -33,6 +30,8 @@ def __setitem__(self, key, value):
3330
op()
3431

3532
def _fancy_index(self, key, value=None, operation='get'):
33+
from pandas.core.series import Series
34+
3635
# going to great lengths to avoid code dup
3736
series = self.series
3837

@@ -134,7 +133,7 @@ def _convert_to_indexer(self, obj, axis=0):
134133
Going by Zen of Python?
135134
"In the face of ambiguity, refuse the temptation to guess."
136135
raise AmbiguousIndexError with integer labels?
137-
136+
- No, prefer label-based indexing
138137
"""
139138
index = self.frame._get_axis(axis)
140139
is_int_index = _is_integer_index(index)
@@ -145,29 +144,25 @@ def _convert_to_indexer(self, obj, axis=0):
145144
else:
146145
return obj
147146
elif _is_list_like(obj):
148-
objarr = np.asarray(obj)
147+
objarr = _asarray_tuplesafe(obj)
149148

150-
if _is_integer_dtype(objarr):
151-
if is_int_index:
152-
raise AmbiguousIndexError('integer labels')
153-
154-
# retrieve the indices corresponding
155-
return objarr
156-
elif objarr.dtype == np.bool_:
149+
if objarr.dtype == np.bool_:
157150
if not obj.index.equals(index):
158151
raise Exception('Cannot use boolean index with misaligned '
159152
'or unequal labels')
160153
return objarr
161154
else:
155+
# If have integer labels, defer to label-based indexing
156+
if _is_integer_dtype(objarr) and not is_int_index:
157+
return objarr
158+
162159
indexer, mask = index.get_indexer(objarr)
163160
if not mask.all():
164161
raise KeyError('%s not in index' % objarr[-mask])
165162

166163
return indexer
167164
else:
168-
if _is_int_like(obj):
169-
if is_int_index:
170-
raise AmbiguousIndexError('integer labels')
165+
if _is_int_like(obj) and not is_int_index:
171166
return obj
172167
return index.get_loc(obj)
173168

@@ -189,43 +184,41 @@ def _fancy_getitem_axis(self, key, axis=0):
189184
elif _is_list_like(key):
190185
return self._fancy_getitem(key, axis=axis)
191186
elif axis == 0:
192-
idx = key
193-
if isinstance(key, int):
194-
if _is_integer_index(self.frame.index):
195-
raise AmbiguousIndexError('integer labels')
187+
is_int_index = _is_integer_index(self.frame.index)
196188

189+
idx = key
190+
if _is_int_like(key) and not is_int_index:
197191
idx = self.frame.index[key]
198192

199193
if self.frame._is_mixed_type:
200194
return self.frame.xs(idx)
201195
else:
196+
# get a view if possible
202197
return self.frame.xs(idx, copy=False)
203198
else:
204199
col = key
205-
if isinstance(key, int):
206-
if _is_integer_index(self.frame.columns):
207-
raise AmbiguousIndexError('integer labels')
200+
if _is_int_like(key) and not _is_integer_index(self.frame.columns):
208201
col = self.frame.columns[key]
209-
210202
return self.frame[col]
211203

212204
def _fancy_getitem(self, key, axis=0):
205+
from pandas.core.series import Series
206+
213207
labels = self.frame._get_axis(axis)
214208
axis_name = self.frame._get_axis_name(axis)
215209

216-
keyarr = np.asarray(key)
217-
218210
# asarray can be unsafe, NumPy strings are weird
219-
isbool = keyarr.dtype == np.bool_
220-
if isbool:
211+
keyarr = _asarray_tuplesafe(key)
212+
213+
if keyarr.dtype == np.bool_:
221214
if isinstance(key, Series):
222215
if not key.index.equals(labels):
223216
raise Exception('Cannot use boolean index with misaligned '
224217
'or unequal labels')
225218
return self.frame.reindex(**{axis_name : labels[np.asarray(key)]})
226219
else:
227-
if _is_integer_dtype(keyarr) and _is_integer_index(labels):
228-
raise AmbiguousIndexError('integer labels')
220+
if _is_integer_dtype(keyarr) and not _is_integer_index(labels):
221+
key = labels.take(keyarr)
229222

230223
return self.frame.reindex(**{axis_name : key})
231224

@@ -298,4 +291,24 @@ def crit(x):
298291
def _need_slice(obj):
299292
return obj.start is not None or obj.stop is not None
300293

294+
def _maybe_droplevels(index, key):
295+
# drop levels
296+
if isinstance(key, tuple):
297+
for _ in key:
298+
index = index.droplevel(0)
299+
else:
300+
index = index.droplevel(0)
301+
302+
return index
303+
304+
def _asarray_tuplesafe(values):
305+
result = np.asarray(values)
306+
307+
if result.ndim == 2:
308+
result = np.empty(len(values), dtype=object)
309+
result[:] = values
310+
311+
return result
312+
301313
_isboolarr = lambda x: np.asarray(x).dtype == np.bool_
314+

0 commit comments

Comments
 (0)