From f2f9f2316bbc2bdda0f96e9452636b664a98664a Mon Sep 17 00:00:00 2001 From: behzad nouri Date: Wed, 14 Jan 2015 19:25:23 -0500 Subject: [PATCH] bug in multi-index where insert fails --- doc/source/whatsnew/v0.16.0.txt | 1 + pandas/core/indexing.py | 51 ++++++++++++++------------------- pandas/tests/test_frame.py | 4 +-- pandas/tests/test_index.py | 50 ++++++++++++++++++++++++++++++++ 4 files changed, 75 insertions(+), 31 deletions(-) diff --git a/doc/source/whatsnew/v0.16.0.txt b/doc/source/whatsnew/v0.16.0.txt index 5ec0e90383f4c..cd3a3f6e38d98 100644 --- a/doc/source/whatsnew/v0.16.0.txt +++ b/doc/source/whatsnew/v0.16.0.txt @@ -103,6 +103,7 @@ Bug Fixes - Bug in ``MultiIndex.has_duplicates`` when having many levels causes an indexer overflow (:issue:`9075`, :issue:`5873`) - Bug in ``pivot`` and `unstack`` where ``nan`` values would break index alignment (:issue:`7466`) - Bug in left ``join`` on multi-index with ``sort=True`` or null values (:issue:`9210`). +- Bug in ``MultiIndex`` where inserting new keys would fail (:issue:`9250`). diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 7202ed64e1c9c..e305eb828f410 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -93,31 +93,27 @@ def _get_loc(self, key, axis=0): def _slice(self, obj, axis=0, typ=None): return self.obj._slice(obj, axis=axis, typ=typ) - def __setitem__(self, key, value): - + def _get_setitem_indexer(self, key): if self.axis is not None: - indexer = self._convert_tuple(key, is_setter=True) + return self._convert_tuple(key, is_setter=True) - else: + axis = self.obj._get_axis(0) + if isinstance(axis, MultiIndex): + try: + return axis.get_loc(key) + except Exception: + pass - # kludgetastic - ax = self.obj._get_axis(0) - if isinstance(ax, MultiIndex): - try: - indexer = ax.get_loc(key) - self._setitem_with_indexer(indexer, value) - return - except Exception: - pass + if isinstance(key, tuple) and not self.ndim < len(key): + return self._convert_tuple(key, is_setter=True) - if isinstance(key, tuple): - if len(key) > self.ndim: - raise IndexingError('only tuples of length <= %d supported' % - self.ndim) - indexer = self._convert_tuple(key, is_setter=True) - else: - indexer = self._convert_to_indexer(key, is_setter=True) + try: + return self._convert_to_indexer(key, is_setter=True) + except TypeError: + raise IndexingError(key) + def __setitem__(self, key, value): + indexer = self._get_setitem_indexer(key) self._setitem_with_indexer(indexer, value) def _has_valid_type(self, k, axis): @@ -259,10 +255,6 @@ def _setitem_with_indexer(self, indexer, value): self.obj._maybe_update_cacher(clear=True) self.obj.is_copy=None - if isinstance(labels, MultiIndex): - self.obj.sortlevel(inplace=True) - labels = self.obj._get_axis(i) - nindexer.append(labels.get_loc(key)) else: @@ -1064,7 +1056,12 @@ def _convert_to_indexer(self, obj, axis=0, is_setter=False): # if we are a label return me try: return labels.get_loc(obj) - except (KeyError, TypeError): + except KeyError: + if isinstance(obj, tuple) and isinstance(labels, MultiIndex): + if is_setter and len(obj) == labels.nlevels: + return {'key': obj} + raise + except TypeError: pass except (ValueError): if not is_int_positional: @@ -1136,10 +1133,6 @@ def _convert_to_indexer(self, obj, axis=0, is_setter=False): mask = check == -1 if mask.any(): - - # mi here - if isinstance(obj, tuple) and is_setter: - return {'key': obj} raise KeyError('%s not in index' % objarr[mask]) return _values_from_object(indexer) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index fcbfb21bd20e3..129bddea6eed5 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -1351,8 +1351,8 @@ def test_getitem_setitem_fancy_exceptions(self): ix = self.frame.ix with assertRaisesRegexp(IndexingError, 'Too many indexers'): ix[:, :, :] - with assertRaisesRegexp(IndexingError, 'only tuples of length <= 2 ' - 'supported'): + + with assertRaises(IndexingError): ix[:, :, :] = 1 def test_getitem_setitem_boolean_misaligned(self): diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py index d474981771015..ef41748e2cda9 100644 --- a/pandas/tests/test_index.py +++ b/pandas/tests/test_index.py @@ -3354,6 +3354,56 @@ def test_insert(self): assertRaisesRegexp(ValueError, "Item must have length equal to number" " of levels", self.index.insert, 0, ('foo2',)) + left = pd.DataFrame([['a', 'b', 0], ['b', 'd', 1]], + columns=['1st', '2nd', '3rd']) + left.set_index(['1st', '2nd'], inplace=True) + ts = left['3rd'].copy(deep=True) + + left.loc[('b', 'x'), '3rd'] = 2 + left.loc[('b', 'a'), '3rd'] = -1 + left.loc[('b', 'b'), '3rd'] = 3 + left.loc[('a', 'x'), '3rd'] = 4 + left.loc[('a', 'w'), '3rd'] = 5 + left.loc[('a', 'a'), '3rd'] = 6 + + ts.loc[('b', 'x')] = 2 + ts.loc['b', 'a'] = -1 + ts.loc[('b', 'b')] = 3 + ts.loc['a', 'x'] = 4 + ts.loc[('a', 'w')] = 5 + ts.loc['a', 'a'] = 6 + + right = pd.DataFrame([['a', 'b', 0], + ['b', 'd', 1], + ['b', 'x', 2], + ['b', 'a', -1], + ['b', 'b', 3], + ['a', 'x', 4], + ['a', 'w', 5], + ['a', 'a', 6]], + columns=['1st', '2nd', '3rd']) + right.set_index(['1st', '2nd'], inplace=True) + # FIXME data types changes to float because + # of intermediate nan insertion; + tm.assert_frame_equal(left, right, check_dtype=False) + tm.assert_series_equal(ts, right['3rd']) + + # GH9250 + idx = [('test1', i) for i in range(5)] + \ + [('test2', i) for i in range(6)] + \ + [('test', 17), ('test', 18)] + + left = pd.Series(np.linspace(0, 10, 11), + pd.MultiIndex.from_tuples(idx[:-2])) + + left.loc[('test', 17)] = 11 + left.ix[('test', 18)] = 12 + + right = pd.Series(np.linspace(0, 12, 13), + pd.MultiIndex.from_tuples(idx)) + + tm.assert_series_equal(left, right) + def test_take_preserve_name(self): taken = self.index.take([3, 0, 1]) self.assertEqual(taken.names, self.index.names)