Skip to content

BUG: Bug in setting with loc/ix a single indexer on a multi-index axis and a listlike (related to GH3777) #4766

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Sep 7, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,7 @@ See :ref:`Internal Refactoring<whatsnew_0130.refactoring>`
- Bug with Series indexing not raising an error when the right-hand-side has an incorrect length (:issue:`2702`)
- Bug in multi-indexing with a partial string selection as one part of a MultIndex (:issue:`4758`)
- Bug with reindexing on the index with a non-unique index will now raise ``ValueError`` (:issue:`4746`)
- Bug in setting with ``loc/ix`` a single indexer with a multi-index axis and a numpy array, related to (:issue:`3777`)

pandas 0.12
===========
Expand Down
88 changes: 80 additions & 8 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,10 @@ def _setitem_with_indexer(self, indexer, value):
labels = _safe_append_to_index(index, key)
self.obj._data = self.obj.reindex_axis(labels,i)._data

if isinstance(labels,MultiIndex):
self.obj.sortlevel(inplace=True)
labels = self.obj._get_axis(i)

nindexer.append(labels.get_loc(key))

else:
Expand Down Expand Up @@ -198,33 +202,77 @@ def _setitem_with_indexer(self, indexer, value):
elif self.ndim >= 3:
return self.obj.__setitem__(indexer,value)

# set
info_axis = self.obj._info_axis_number
item_labels = self.obj._get_axis(info_axis)

# if we have a complicated setup, take the split path
if isinstance(indexer, tuple) and any([ isinstance(ax,MultiIndex) for ax in self.obj.axes ]):
take_split_path = True

# align and set the values
if take_split_path:

if not isinstance(indexer, tuple):
indexer = self._tuplify(indexer)

if isinstance(value, ABCSeries):
value = self._align_series(indexer, value)

info_axis = self.obj._info_axis_number
info_idx = indexer[info_axis]

if com.is_integer(info_idx):
info_idx = [info_idx]
labels = item_labels[info_idx]

# if we have a partial multiindex, then need to adjust the plane indexer here
if len(labels) == 1 and isinstance(self.obj[labels[0]].index,MultiIndex):
index = self.obj[labels[0]].index
idx = indexer[:info_axis][0]
try:
if idx in index:
idx = index.get_loc(idx)
except:
pass
plane_indexer = tuple([idx]) + indexer[info_axis + 1:]
lplane_indexer = _length_of_indexer(plane_indexer[0],index)

plane_indexer = indexer[:info_axis] + indexer[info_axis + 1:]
item_labels = self.obj._get_axis(info_axis)
if is_list_like(value) and lplane_indexer != len(value):
raise ValueError("cannot set using a multi-index selection indexer with a different length than the value")

# non-mi
else:
plane_indexer = indexer[:info_axis] + indexer[info_axis + 1:]
if info_axis > 0:
plane_axis = self.obj.axes[:info_axis][0]
lplane_indexer = _length_of_indexer(plane_indexer[0],plane_axis)
else:
lplane_indexer = 0

def setter(item, v):
s = self.obj[item]
pi = plane_indexer[0] if len(plane_indexer) == 1 else plane_indexer
pi = plane_indexer[0] if lplane_indexer == 1 else plane_indexer

# set the item, possibly having a dtype change
s = s.copy()
s._data = s._data.setitem(pi,v)
self.obj[item] = s

labels = item_labels[info_idx]
def can_do_equal_len():
""" return True if we have an equal len settable """
if not len(labels) == 1:
return False

l = len(value)
item = labels[0]
index = self.obj[item].index

# equal len list/ndarray
if len(index) == l:
return True
elif lplane_indexer == l:
return True

return False

if _is_list_like(value):

Expand All @@ -251,8 +299,7 @@ def setter(item, v):
setter(item, value[:,i])

# we have an equal len list/ndarray
elif len(labels) == 1 and (
len(self.obj[labels[0]]) == len(value) or len(plane_indexer[0]) == len(value)):
elif can_do_equal_len():
setter(labels[0], value)

# per label values
Expand Down Expand Up @@ -1104,6 +1151,31 @@ def _convert_key(self, key):
# 32-bit floating point machine epsilon
_eps = np.finfo('f4').eps

def _length_of_indexer(indexer,target=None):
""" return the length of a single non-tuple indexer which could be a slice """
if target is not None and isinstance(indexer, slice):
l = len(target)
start = indexer.start
stop = indexer.stop
step = indexer.step
if start is None:
start = 0
elif start < 0:
start += l
if stop is None or stop > l:
stop = l
elif stop < 0:
stop += l
if step is None:
step = 1
elif step < 0:
step = abs(step)
return (stop-start) / step
elif isinstance(indexer, (ABCSeries, np.ndarray, list)):
return len(indexer)
elif not is_list_like(indexer):
return 1
raise AssertionError("cannot find the length of the indexer")

def _convert_to_index_sliceable(obj, key):
""" if we are index sliceable, then return my slicer, otherwise return None """
Expand Down
20 changes: 3 additions & 17 deletions pandas/core/internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@
is_list_like, _infer_dtype_from_scalar)
from pandas.core.index import (Index, MultiIndex, _ensure_index,
_handle_legacy_indexes)
from pandas.core.indexing import _check_slice_bounds, _maybe_convert_indices
from pandas.core.indexing import (_check_slice_bounds, _maybe_convert_indices,
_length_of_indexer)
import pandas.core.common as com
from pandas.sparse.array import _maybe_to_sparse, SparseArray
import pandas.lib as lib
Expand Down Expand Up @@ -563,22 +564,7 @@ def setitem(self, indexer, value):
elif isinstance(indexer, slice):

if is_list_like(value) and l:
start = indexer.start
stop = indexer.stop
step = indexer.step
if start is None:
start = 0
elif start < 0:
start += l
if stop is None or stop > l:
stop = len(values)
elif stop < 0:
stop += l
if step is None:
step = 1
elif step < 0:
step = abs(step)
if (stop-start) / step != len(value):
if len(value) != _length_of_indexer(indexer, values):
raise ValueError("cannot set using a slice indexer with a different length than the value")

try:
Expand Down
54 changes: 54 additions & 0 deletions pandas/tests/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -917,6 +917,60 @@ def f():
#result = wp.loc[['Item1', 'Item2'], :, ['A', 'B']]
#tm.assert_panel_equal(result,expected)

def test_multiindex_assignment(self):

# GH3777 part 2

# mixed dtype
df = DataFrame(np.random.randint(5,10,size=9).reshape(3, 3),
columns=list('abc'),
index=[[4,4,8],[8,10,12]])
df['d'] = np.nan
arr = np.array([0.,1.])

df.ix[4,'d'] = arr
assert_series_equal(df.ix[4,'d'],Series(arr,index=[8,10],name='d'))

# single dtype
df = DataFrame(np.random.randint(5,10,size=9).reshape(3, 3),
columns=list('abc'),
index=[[4,4,8],[8,10,12]])

df.ix[4,'c'] = arr
assert_series_equal(df.ix[4,'c'],Series(arr,index=[8,10],name='c',dtype='int64'))

# scalar ok
df.ix[4,'c'] = 10
assert_series_equal(df.ix[4,'c'],Series(10,index=[8,10],name='c',dtype='int64'))

# invalid assignments
def f():
df.ix[4,'c'] = [0,1,2,3]
self.assertRaises(ValueError, f)

def f():
df.ix[4,'c'] = [0]
self.assertRaises(ValueError, f)

# groupby example
NUM_ROWS = 100
NUM_COLS = 10
col_names = ['A'+num for num in map(str,np.arange(NUM_COLS).tolist())]
index_cols = col_names[:5]
df = DataFrame(np.random.randint(5, size=(NUM_ROWS,NUM_COLS)), dtype=np.int64, columns=col_names)
df = df.set_index(index_cols).sort_index()
grp = df.groupby(level=index_cols[:4])
df['new_col'] = np.nan

f_index = np.arange(5)
def f(name,df2):
return Series(np.arange(df2.shape[0]),name=df2.index.values[0]).reindex(f_index)
new_df = pd.concat([ f(name,df2) for name, df2 in grp ],axis=1).T

for name, df2 in grp:
new_vals = np.arange(df2.shape[0])
df.ix[name, 'new_col'] = new_vals

def test_multi_assign(self):

# GH 3626, an assignement of a sub-df to a df
Expand Down