diff --git a/doc/source/release.rst b/doc/source/release.rst index 9a34cdbdfb5a8..1e0c980ca752d 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -327,6 +327,7 @@ See :ref:`Internal Refactoring` - Bug with Series indexing not raising an error when the right-hand-side has an incorrect length (:issue:`2702`) - Bug in multi-indexing with a partial string selection as one part of a MultIndex (:issue:`4758`) - Bug with reindexing on the index with a non-unique index will now raise ``ValueError`` (:issue:`4746`) + - Bug in setting with ``loc/ix`` a single indexer with a multi-index axis and a numpy array, related to (:issue:`3777`) pandas 0.12 =========== diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 19eeecfeb2bde..72196fcdad38d 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -163,6 +163,10 @@ def _setitem_with_indexer(self, indexer, value): labels = _safe_append_to_index(index, key) self.obj._data = self.obj.reindex_axis(labels,i)._data + if isinstance(labels,MultiIndex): + self.obj.sortlevel(inplace=True) + labels = self.obj._get_axis(i) + nindexer.append(labels.get_loc(key)) else: @@ -198,33 +202,77 @@ def _setitem_with_indexer(self, indexer, value): elif self.ndim >= 3: return self.obj.__setitem__(indexer,value) + # set + info_axis = self.obj._info_axis_number + item_labels = self.obj._get_axis(info_axis) + + # if we have a complicated setup, take the split path + if isinstance(indexer, tuple) and any([ isinstance(ax,MultiIndex) for ax in self.obj.axes ]): + take_split_path = True + # align and set the values if take_split_path: + if not isinstance(indexer, tuple): indexer = self._tuplify(indexer) if isinstance(value, ABCSeries): value = self._align_series(indexer, value) - info_axis = self.obj._info_axis_number info_idx = indexer[info_axis] - if com.is_integer(info_idx): info_idx = [info_idx] + labels = item_labels[info_idx] + + # if we have a partial multiindex, then need to adjust the plane indexer here + if len(labels) == 1 and isinstance(self.obj[labels[0]].index,MultiIndex): + index = self.obj[labels[0]].index + idx = indexer[:info_axis][0] + try: + if idx in index: + idx = index.get_loc(idx) + except: + pass + plane_indexer = tuple([idx]) + indexer[info_axis + 1:] + lplane_indexer = _length_of_indexer(plane_indexer[0],index) - plane_indexer = indexer[:info_axis] + indexer[info_axis + 1:] - item_labels = self.obj._get_axis(info_axis) + if is_list_like(value) and lplane_indexer != len(value): + raise ValueError("cannot set using a multi-index selection indexer with a different length than the value") + + # non-mi + else: + plane_indexer = indexer[:info_axis] + indexer[info_axis + 1:] + if info_axis > 0: + plane_axis = self.obj.axes[:info_axis][0] + lplane_indexer = _length_of_indexer(plane_indexer[0],plane_axis) + else: + lplane_indexer = 0 def setter(item, v): s = self.obj[item] - pi = plane_indexer[0] if len(plane_indexer) == 1 else plane_indexer + pi = plane_indexer[0] if lplane_indexer == 1 else plane_indexer # set the item, possibly having a dtype change s = s.copy() s._data = s._data.setitem(pi,v) self.obj[item] = s - labels = item_labels[info_idx] + def can_do_equal_len(): + """ return True if we have an equal len settable """ + if not len(labels) == 1: + return False + + l = len(value) + item = labels[0] + index = self.obj[item].index + + # equal len list/ndarray + if len(index) == l: + return True + elif lplane_indexer == l: + return True + + return False if _is_list_like(value): @@ -251,8 +299,7 @@ def setter(item, v): setter(item, value[:,i]) # we have an equal len list/ndarray - elif len(labels) == 1 and ( - len(self.obj[labels[0]]) == len(value) or len(plane_indexer[0]) == len(value)): + elif can_do_equal_len(): setter(labels[0], value) # per label values @@ -1104,6 +1151,31 @@ def _convert_key(self, key): # 32-bit floating point machine epsilon _eps = np.finfo('f4').eps +def _length_of_indexer(indexer,target=None): + """ return the length of a single non-tuple indexer which could be a slice """ + if target is not None and isinstance(indexer, slice): + l = len(target) + start = indexer.start + stop = indexer.stop + step = indexer.step + if start is None: + start = 0 + elif start < 0: + start += l + if stop is None or stop > l: + stop = l + elif stop < 0: + stop += l + if step is None: + step = 1 + elif step < 0: + step = abs(step) + return (stop-start) / step + elif isinstance(indexer, (ABCSeries, np.ndarray, list)): + return len(indexer) + elif not is_list_like(indexer): + return 1 + raise AssertionError("cannot find the length of the indexer") def _convert_to_index_sliceable(obj, key): """ if we are index sliceable, then return my slicer, otherwise return None """ diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 91fdc712fb9b8..57db36b252e3c 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -12,7 +12,8 @@ is_list_like, _infer_dtype_from_scalar) from pandas.core.index import (Index, MultiIndex, _ensure_index, _handle_legacy_indexes) -from pandas.core.indexing import _check_slice_bounds, _maybe_convert_indices +from pandas.core.indexing import (_check_slice_bounds, _maybe_convert_indices, + _length_of_indexer) import pandas.core.common as com from pandas.sparse.array import _maybe_to_sparse, SparseArray import pandas.lib as lib @@ -563,22 +564,7 @@ def setitem(self, indexer, value): elif isinstance(indexer, slice): if is_list_like(value) and l: - start = indexer.start - stop = indexer.stop - step = indexer.step - if start is None: - start = 0 - elif start < 0: - start += l - if stop is None or stop > l: - stop = len(values) - elif stop < 0: - stop += l - if step is None: - step = 1 - elif step < 0: - step = abs(step) - if (stop-start) / step != len(value): + if len(value) != _length_of_indexer(indexer, values): raise ValueError("cannot set using a slice indexer with a different length than the value") try: diff --git a/pandas/tests/test_indexing.py b/pandas/tests/test_indexing.py index 66193248ffb7d..d6088c2d72525 100644 --- a/pandas/tests/test_indexing.py +++ b/pandas/tests/test_indexing.py @@ -917,6 +917,60 @@ def f(): #result = wp.loc[['Item1', 'Item2'], :, ['A', 'B']] #tm.assert_panel_equal(result,expected) + def test_multiindex_assignment(self): + + # GH3777 part 2 + + # mixed dtype + df = DataFrame(np.random.randint(5,10,size=9).reshape(3, 3), + columns=list('abc'), + index=[[4,4,8],[8,10,12]]) + df['d'] = np.nan + arr = np.array([0.,1.]) + + df.ix[4,'d'] = arr + assert_series_equal(df.ix[4,'d'],Series(arr,index=[8,10],name='d')) + + # single dtype + df = DataFrame(np.random.randint(5,10,size=9).reshape(3, 3), + columns=list('abc'), + index=[[4,4,8],[8,10,12]]) + + df.ix[4,'c'] = arr + assert_series_equal(df.ix[4,'c'],Series(arr,index=[8,10],name='c',dtype='int64')) + + # scalar ok + df.ix[4,'c'] = 10 + assert_series_equal(df.ix[4,'c'],Series(10,index=[8,10],name='c',dtype='int64')) + + # invalid assignments + def f(): + df.ix[4,'c'] = [0,1,2,3] + self.assertRaises(ValueError, f) + + def f(): + df.ix[4,'c'] = [0] + self.assertRaises(ValueError, f) + + # groupby example + NUM_ROWS = 100 + NUM_COLS = 10 + col_names = ['A'+num for num in map(str,np.arange(NUM_COLS).tolist())] + index_cols = col_names[:5] + df = DataFrame(np.random.randint(5, size=(NUM_ROWS,NUM_COLS)), dtype=np.int64, columns=col_names) + df = df.set_index(index_cols).sort_index() + grp = df.groupby(level=index_cols[:4]) + df['new_col'] = np.nan + + f_index = np.arange(5) + def f(name,df2): + return Series(np.arange(df2.shape[0]),name=df2.index.values[0]).reindex(f_index) + new_df = pd.concat([ f(name,df2) for name, df2 in grp ],axis=1).T + + for name, df2 in grp: + new_vals = np.arange(df2.shape[0]) + df.ix[name, 'new_col'] = new_vals + def test_multi_assign(self): # GH 3626, an assignement of a sub-df to a df