diff --git a/doc/source/release.rst b/doc/source/release.rst index dc82550be6500..3a22de3cb43f3 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -826,6 +826,7 @@ Bug Fixes - Bug in repeated indexing of object with resultant non-unique index (:issue:`5678`) - Bug in fillna with Series and a passed series/dict (:issue:`5703`) - Bug in groupby transform with a datetime-like grouper (:issue:`5712`) + - Bug in multi-index selection in PY3 when using certain keys (:issue:`5725`) pandas 0.12.0 ------------- diff --git a/pandas/core/generic.py b/pandas/core/generic.py index b77ea2b22f4fa..e07655b0539a5 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -721,13 +721,13 @@ def __setstate__(self, state): # to avoid definitional recursion # e.g. say fill_value needing _data to be # defined - for k in self._internal_names: + for k in self._internal_names_set: if k in state: v = state[k] object.__setattr__(self, k, v) for k, v in state.items(): - if k not in self._internal_names: + if k not in self._internal_names_set: object.__setattr__(self, k, v) else: @@ -938,15 +938,22 @@ def to_clipboard(self, excel=None, sep=None, **kwargs): @classmethod def _create_indexer(cls, name, indexer): """ create an indexer like _name in the class """ - iname = '_%s' % name - setattr(cls, iname, None) - def _indexer(self): - if getattr(self, iname, None) is None: - setattr(self, iname, indexer(self, name)) - return getattr(self, iname) + if getattr(cls, name, None) is None: + iname = '_%s' % name + setattr(cls, iname, None) - setattr(cls, name, property(_indexer)) + def _indexer(self): + i = getattr(self, iname) + if i is None: + i = indexer(self, name) + setattr(self, iname, i) + return i + + setattr(cls, name, property(_indexer)) + + # add to our internal names set + cls._internal_names_set.add(iname) def get(self, key, default=None): """ @@ -1831,9 +1838,9 @@ def fillna(self, value=None, method=None, axis=0, inplace=False, pad / ffill: propagate last valid observation forward to next valid backfill / bfill: use NEXT valid observation to fill gap value : scalar, dict, or Series - Value to use to fill holes (e.g. 0), alternately a dict/Series of - values specifying which value to use for each index (for a Series) or - column (for a DataFrame). (values not in the dict/Series will not be + Value to use to fill holes (e.g. 0), alternately a dict/Series of + values specifying which value to use for each index (for a Series) or + column (for a DataFrame). (values not in the dict/Series will not be filled). This value cannot be a list. axis : {0, 1}, default 0 0: fill column-by-column @@ -1845,8 +1852,8 @@ def fillna(self, value=None, method=None, axis=0, inplace=False, limit : int, default None Maximum size gap to forward or backward fill downcast : dict, default is None - a dict of item->dtype of what to downcast if possible, - or the string 'infer' which will try to downcast to an appropriate + a dict of item->dtype of what to downcast if possible, + or the string 'infer' which will try to downcast to an appropriate equal type (e.g. float64 to int64 if possible) See also diff --git a/pandas/core/index.py b/pandas/core/index.py index 7ae273d08fa87..30f93564db318 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -2300,8 +2300,14 @@ def _try_mi(k): # a Timestamp will raise a TypeError in a multi-index # rather than a KeyError, try it here + # note that a string that 'looks' like a Timestamp will raise + # a KeyError! (GH5725) if isinstance(key, (datetime.datetime, np.datetime64)) or ( compat.PY3 and isinstance(key, compat.string_types)): + try: + return _try_mi(key) + except: + pass try: return _try_mi(Timestamp(key)) except: diff --git a/pandas/tests/test_indexing.py b/pandas/tests/test_indexing.py index 4954decd5195b..e601755ba8aaf 100644 --- a/pandas/tests/test_indexing.py +++ b/pandas/tests/test_indexing.py @@ -3,7 +3,7 @@ import itertools import warnings -from pandas.compat import range, lrange, StringIO, lmap, map +from pandas.compat import range, lrange, lzip, StringIO, lmap, map from numpy import random, nan from numpy.random import randn import numpy as np @@ -249,6 +249,15 @@ def _print(result, error = None): k2 = key2 _eq(t, o, a, obj, key1, k2) + def test_indexer_caching(self): + # GH5727 + # make sure that indexers are in the _internal_names_set + n = 1000001 + arrays = [lrange(n), lrange(n)] + index = MultiIndex.from_tuples(lzip(*arrays)) + s = Series(np.zeros(n), index=index) + str(s) + def test_at_and_iat_get(self): def _check(f, func, values = False): @@ -830,6 +839,20 @@ def test_xs_multiindex(self): expected.columns = expected.columns.droplevel('lvl1') assert_frame_equal(result, expected) + def test_getitem_multiindex(self): + + # GH 5725 + # the 'A' happens to be a valid Timestamp so the doesn't raise the appropriate + # error, only in PY3 of course! + index = MultiIndex(levels=[['A', 'B', 'C'], [0, 26, 27, 37, 57, 67, 75, 82]], + labels=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2], [1, 3, 4, 6, 0, 2, 2, 3, 5, 7]], + names=['tag', 'day']) + arr = np.random.randn(len(index),1) + df = DataFrame(arr,index=index,columns=['val']) + result = df.val['A'] + expected = Series(arr.ravel()[0:3],name='val',index=Index([26,37,57],name='day')) + assert_series_equal(result,expected) + def test_setitem_dtype_upcast(self): # GH3216