Skip to content

Commit 22f04f7

Browse files
committed
Merge pull request #4766 from jreback/ix_assign
BUG: Bug in setting with loc/ix a single indexer on a multi-index axis and a listlike (related to GH3777)
2 parents 84ca068 + b057202 commit 22f04f7

File tree

4 files changed

+138
-25
lines changed

4 files changed

+138
-25
lines changed

doc/source/release.rst

+1
Original file line numberDiff line numberDiff line change
@@ -329,6 +329,7 @@ See :ref:`Internal Refactoring<whatsnew_0130.refactoring>`
329329
- Bug with Series indexing not raising an error when the right-hand-side has an incorrect length (:issue:`2702`)
330330
- Bug in multi-indexing with a partial string selection as one part of a MultIndex (:issue:`4758`)
331331
- Bug with reindexing on the index with a non-unique index will now raise ``ValueError`` (:issue:`4746`)
332+
- Bug in setting with ``loc/ix`` a single indexer with a multi-index axis and a numpy array, related to (:issue:`3777`)
332333

333334
pandas 0.12
334335
===========

pandas/core/indexing.py

+80-8
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,10 @@ def _setitem_with_indexer(self, indexer, value):
163163
labels = _safe_append_to_index(index, key)
164164
self.obj._data = self.obj.reindex_axis(labels,i)._data
165165

166+
if isinstance(labels,MultiIndex):
167+
self.obj.sortlevel(inplace=True)
168+
labels = self.obj._get_axis(i)
169+
166170
nindexer.append(labels.get_loc(key))
167171

168172
else:
@@ -198,33 +202,77 @@ def _setitem_with_indexer(self, indexer, value):
198202
elif self.ndim >= 3:
199203
return self.obj.__setitem__(indexer,value)
200204

205+
# set
206+
info_axis = self.obj._info_axis_number
207+
item_labels = self.obj._get_axis(info_axis)
208+
209+
# if we have a complicated setup, take the split path
210+
if isinstance(indexer, tuple) and any([ isinstance(ax,MultiIndex) for ax in self.obj.axes ]):
211+
take_split_path = True
212+
201213
# align and set the values
202214
if take_split_path:
215+
203216
if not isinstance(indexer, tuple):
204217
indexer = self._tuplify(indexer)
205218

206219
if isinstance(value, ABCSeries):
207220
value = self._align_series(indexer, value)
208221

209-
info_axis = self.obj._info_axis_number
210222
info_idx = indexer[info_axis]
211-
212223
if com.is_integer(info_idx):
213224
info_idx = [info_idx]
225+
labels = item_labels[info_idx]
226+
227+
# if we have a partial multiindex, then need to adjust the plane indexer here
228+
if len(labels) == 1 and isinstance(self.obj[labels[0]].index,MultiIndex):
229+
index = self.obj[labels[0]].index
230+
idx = indexer[:info_axis][0]
231+
try:
232+
if idx in index:
233+
idx = index.get_loc(idx)
234+
except:
235+
pass
236+
plane_indexer = tuple([idx]) + indexer[info_axis + 1:]
237+
lplane_indexer = _length_of_indexer(plane_indexer[0],index)
214238

215-
plane_indexer = indexer[:info_axis] + indexer[info_axis + 1:]
216-
item_labels = self.obj._get_axis(info_axis)
239+
if is_list_like(value) and lplane_indexer != len(value):
240+
raise ValueError("cannot set using a multi-index selection indexer with a different length than the value")
241+
242+
# non-mi
243+
else:
244+
plane_indexer = indexer[:info_axis] + indexer[info_axis + 1:]
245+
if info_axis > 0:
246+
plane_axis = self.obj.axes[:info_axis][0]
247+
lplane_indexer = _length_of_indexer(plane_indexer[0],plane_axis)
248+
else:
249+
lplane_indexer = 0
217250

218251
def setter(item, v):
219252
s = self.obj[item]
220-
pi = plane_indexer[0] if len(plane_indexer) == 1 else plane_indexer
253+
pi = plane_indexer[0] if lplane_indexer == 1 else plane_indexer
221254

222255
# set the item, possibly having a dtype change
223256
s = s.copy()
224257
s._data = s._data.setitem(pi,v)
225258
self.obj[item] = s
226259

227-
labels = item_labels[info_idx]
260+
def can_do_equal_len():
261+
""" return True if we have an equal len settable """
262+
if not len(labels) == 1:
263+
return False
264+
265+
l = len(value)
266+
item = labels[0]
267+
index = self.obj[item].index
268+
269+
# equal len list/ndarray
270+
if len(index) == l:
271+
return True
272+
elif lplane_indexer == l:
273+
return True
274+
275+
return False
228276

229277
if _is_list_like(value):
230278

@@ -251,8 +299,7 @@ def setter(item, v):
251299
setter(item, value[:,i])
252300

253301
# we have an equal len list/ndarray
254-
elif len(labels) == 1 and (
255-
len(self.obj[labels[0]]) == len(value) or len(plane_indexer[0]) == len(value)):
302+
elif can_do_equal_len():
256303
setter(labels[0], value)
257304

258305
# per label values
@@ -1104,6 +1151,31 @@ def _convert_key(self, key):
11041151
# 32-bit floating point machine epsilon
11051152
_eps = np.finfo('f4').eps
11061153

1154+
def _length_of_indexer(indexer,target=None):
1155+
""" return the length of a single non-tuple indexer which could be a slice """
1156+
if target is not None and isinstance(indexer, slice):
1157+
l = len(target)
1158+
start = indexer.start
1159+
stop = indexer.stop
1160+
step = indexer.step
1161+
if start is None:
1162+
start = 0
1163+
elif start < 0:
1164+
start += l
1165+
if stop is None or stop > l:
1166+
stop = l
1167+
elif stop < 0:
1168+
stop += l
1169+
if step is None:
1170+
step = 1
1171+
elif step < 0:
1172+
step = abs(step)
1173+
return (stop-start) / step
1174+
elif isinstance(indexer, (ABCSeries, np.ndarray, list)):
1175+
return len(indexer)
1176+
elif not is_list_like(indexer):
1177+
return 1
1178+
raise AssertionError("cannot find the length of the indexer")
11071179

11081180
def _convert_to_index_sliceable(obj, key):
11091181
""" if we are index sliceable, then return my slicer, otherwise return None """

pandas/core/internals.py

+3-17
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,8 @@
1212
is_list_like, _infer_dtype_from_scalar)
1313
from pandas.core.index import (Index, MultiIndex, _ensure_index,
1414
_handle_legacy_indexes)
15-
from pandas.core.indexing import _check_slice_bounds, _maybe_convert_indices
15+
from pandas.core.indexing import (_check_slice_bounds, _maybe_convert_indices,
16+
_length_of_indexer)
1617
import pandas.core.common as com
1718
from pandas.sparse.array import _maybe_to_sparse, SparseArray
1819
import pandas.lib as lib
@@ -563,22 +564,7 @@ def setitem(self, indexer, value):
563564
elif isinstance(indexer, slice):
564565

565566
if is_list_like(value) and l:
566-
start = indexer.start
567-
stop = indexer.stop
568-
step = indexer.step
569-
if start is None:
570-
start = 0
571-
elif start < 0:
572-
start += l
573-
if stop is None or stop > l:
574-
stop = len(values)
575-
elif stop < 0:
576-
stop += l
577-
if step is None:
578-
step = 1
579-
elif step < 0:
580-
step = abs(step)
581-
if (stop-start) / step != len(value):
567+
if len(value) != _length_of_indexer(indexer, values):
582568
raise ValueError("cannot set using a slice indexer with a different length than the value")
583569

584570
try:

pandas/tests/test_indexing.py

+54
Original file line numberDiff line numberDiff line change
@@ -917,6 +917,60 @@ def f():
917917
#result = wp.loc[['Item1', 'Item2'], :, ['A', 'B']]
918918
#tm.assert_panel_equal(result,expected)
919919

920+
def test_multiindex_assignment(self):
921+
922+
# GH3777 part 2
923+
924+
# mixed dtype
925+
df = DataFrame(np.random.randint(5,10,size=9).reshape(3, 3),
926+
columns=list('abc'),
927+
index=[[4,4,8],[8,10,12]])
928+
df['d'] = np.nan
929+
arr = np.array([0.,1.])
930+
931+
df.ix[4,'d'] = arr
932+
assert_series_equal(df.ix[4,'d'],Series(arr,index=[8,10],name='d'))
933+
934+
# single dtype
935+
df = DataFrame(np.random.randint(5,10,size=9).reshape(3, 3),
936+
columns=list('abc'),
937+
index=[[4,4,8],[8,10,12]])
938+
939+
df.ix[4,'c'] = arr
940+
assert_series_equal(df.ix[4,'c'],Series(arr,index=[8,10],name='c',dtype='int64'))
941+
942+
# scalar ok
943+
df.ix[4,'c'] = 10
944+
assert_series_equal(df.ix[4,'c'],Series(10,index=[8,10],name='c',dtype='int64'))
945+
946+
# invalid assignments
947+
def f():
948+
df.ix[4,'c'] = [0,1,2,3]
949+
self.assertRaises(ValueError, f)
950+
951+
def f():
952+
df.ix[4,'c'] = [0]
953+
self.assertRaises(ValueError, f)
954+
955+
# groupby example
956+
NUM_ROWS = 100
957+
NUM_COLS = 10
958+
col_names = ['A'+num for num in map(str,np.arange(NUM_COLS).tolist())]
959+
index_cols = col_names[:5]
960+
df = DataFrame(np.random.randint(5, size=(NUM_ROWS,NUM_COLS)), dtype=np.int64, columns=col_names)
961+
df = df.set_index(index_cols).sort_index()
962+
grp = df.groupby(level=index_cols[:4])
963+
df['new_col'] = np.nan
964+
965+
f_index = np.arange(5)
966+
def f(name,df2):
967+
return Series(np.arange(df2.shape[0]),name=df2.index.values[0]).reindex(f_index)
968+
new_df = pd.concat([ f(name,df2) for name, df2 in grp ],axis=1).T
969+
970+
for name, df2 in grp:
971+
new_vals = np.arange(df2.shape[0])
972+
df.ix[name, 'new_col'] = new_vals
973+
920974
def test_multi_assign(self):
921975

922976
# GH 3626, an assignement of a sub-df to a df

0 commit comments

Comments
 (0)