Skip to content

Commit 747fee2

Browse files
committed
BUG: Fix iloc with duplicate labels
closes pandas-dev#15686
1 parent 929c66f commit 747fee2

File tree

2 files changed

+125
-33
lines changed

2 files changed

+125
-33
lines changed

pandas/core/dtypes/cast.py

+20
Original file line numberDiff line numberDiff line change
@@ -1085,3 +1085,23 @@ def cast_scalar_to_array(shape, value, dtype=None):
10851085
values.fill(fill_value)
10861086

10871087
return values
1088+
1089+
def _maybe_convert_indexer(indexer, until):
1090+
"""
1091+
Convert slice, tuple, list or scalar "indexer" to 1-d array of indices,
1092+
using "until" as maximum for upwards open slices.
1093+
"""
1094+
1095+
if is_scalar(indexer):
1096+
return np.array([indexer], dtype=int)
1097+
1098+
if isinstance(indexer, np.ndarray):
1099+
if indexer.dtype == bool:
1100+
return np.where(indexer)[0]
1101+
return indexer
1102+
1103+
if isinstance(indexer, slice):
1104+
stop = until if indexer.stop is None else indexer.stop
1105+
return np.arange(stop, dtype=int)[indexer]
1106+
1107+
return np.array(indexer, dtype=int)

pandas/core/indexing.py

100755100644
+105-33
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
_is_unorderable_exception,
1717
_ensure_platform_int)
1818
from pandas.core.dtypes.missing import isna, _infer_fill_value
19+
from pandas.core.dtypes.cast import _maybe_convert_indexer
1920

2021
from pandas.core.index import Index, MultiIndex
2122

@@ -81,6 +82,24 @@ def __getitem__(self, arg):
8182
IndexSlice = _IndexSlice()
8283

8384

85+
class InfoCleaner:
86+
"""
87+
A context manager which temporarily removes labels on the "info" axis,
88+
replacing them with a RangeIndex, and then puts them back in place.
89+
Used to unambiguously index by position.
90+
"""
91+
def __init__(self, obj):
92+
self._obj = obj
93+
self._info_axis = self._obj._AXIS_NAMES[self._obj._info_axis_number]
94+
95+
def __enter__(self):
96+
self._old_col = getattr(self._obj, self._info_axis)
97+
setattr(self._obj, self._info_axis, range(len(self._old_col)))
98+
99+
def __exit__(self, *args):
100+
setattr(self._obj, self._info_axis, self._old_col)
101+
102+
84103
class IndexingError(Exception):
85104
pass
86105

@@ -492,29 +511,10 @@ def _setitem_with_indexer(self, indexer, value):
492511
else:
493512
lplane_indexer = 0
494513

495-
def setter(item, v):
496-
s = self.obj[item]
497-
pi = plane_indexer[0] if lplane_indexer == 1 else plane_indexer
498-
499-
# perform the equivalent of a setitem on the info axis
500-
# as we have a null slice or a slice with full bounds
501-
# which means essentially reassign to the columns of a
502-
# multi-dim object
503-
# GH6149 (null slice), GH10408 (full bounds)
504-
if (isinstance(pi, tuple) and
505-
all(is_null_slice(idx) or
506-
is_full_slice(idx, len(self.obj))
507-
for idx in pi)):
508-
s = v
509-
else:
510-
# set the item, possibly having a dtype change
511-
s._consolidate_inplace()
512-
s = s.copy()
513-
s._data = s._data.setitem(indexer=pi, value=v)
514-
s._maybe_update_cacher(clear=True)
515-
516-
# reset the sliced object if unique
517-
self.obj[item] = s
514+
setter_kwargs = {'items': labels,
515+
'indexer': indexer,
516+
'pi': plane_indexer[0] if lplane_indexer == 1
517+
else plane_indexer}
518518

519519
def can_do_equal_len():
520520
""" return True if we have an equal len settable """
@@ -542,7 +542,7 @@ def can_do_equal_len():
542542
sub_indexer = list(indexer)
543543
multiindex_indexer = isinstance(labels, MultiIndex)
544544

545-
for item in labels:
545+
for idx, item in enumerate(labels):
546546
if item in value:
547547
sub_indexer[info_axis] = item
548548
v = self._align_series(
@@ -551,7 +551,7 @@ def can_do_equal_len():
551551
else:
552552
v = np.nan
553553

554-
setter(item, v)
554+
self._setter(idx, v, force_loc=True, **setter_kwargs)
555555

556556
# we have an equal len ndarray/convertible to our labels
557557
elif np.array(value).ndim == 2:
@@ -563,14 +563,15 @@ def can_do_equal_len():
563563
raise ValueError('Must have equal len keys and value '
564564
'when setting with an ndarray')
565565

566-
for i, item in enumerate(labels):
566+
for i in range(len(labels)):
567567

568568
# setting with a list, recoerces
569-
setter(item, value[:, i].tolist())
569+
self._setter(i, value[:, i].tolist(), force_loc=True,
570+
**setter_kwargs)
570571

571572
# we have an equal len list/ndarray
572573
elif can_do_equal_len():
573-
setter(labels[0], value)
574+
self._setter(0, value, **setter_kwargs)
574575

575576
# per label values
576577
else:
@@ -579,13 +580,12 @@ def can_do_equal_len():
579580
raise ValueError('Must have equal len keys and value '
580581
'when setting with an iterable')
581582

582-
for item, v in zip(labels, value):
583-
setter(item, v)
583+
for i, v in zip(range(len(labels)), value):
584+
self._setter(i, v, **setter_kwargs)
584585
else:
585-
586586
# scalar
587-
for item in labels:
588-
setter(item, value)
587+
for idx in range(len(labels)):
588+
self._setter(idx, value, **setter_kwargs)
589589

590590
else:
591591
if isinstance(indexer, tuple):
@@ -619,6 +619,47 @@ def can_do_equal_len():
619619
value=value)
620620
self.obj._maybe_update_cacher(clear=True)
621621

622+
def _setter(self, idx, v, items, pi, **kwargs):
623+
"""
624+
Set a single value on the underlying object. Label-based.
625+
626+
Parameters
627+
----------
628+
idx : int
629+
The index of the desired element inside "items"
630+
631+
v : any
632+
The value to assign to the specified location
633+
634+
items: list
635+
A list of labels
636+
637+
pi: tuple or list-like
638+
Components of original indexer preceding the info axis
639+
"""
640+
item = items[idx]
641+
s = self.obj[item]
642+
643+
# perform the equivalent of a setitem on the info axis
644+
# as we have a null slice or a slice with full bounds
645+
# which means essentially reassign to the columns of a
646+
# multi-dim object
647+
# GH6149 (null slice), GH10408 (full bounds)
648+
if (isinstance(pi, tuple) and
649+
all(is_null_slice(ix) or
650+
is_full_slice(ix, len(self.obj))
651+
for ix in pi)):
652+
s = v
653+
else:
654+
# set the item, possibly having a dtype change
655+
s._consolidate_inplace()
656+
s = s.copy()
657+
s._data = s._data.setitem(indexer=pi, value=v)
658+
s._maybe_update_cacher(clear=True)
659+
660+
# reset the sliced object if unique
661+
self.obj[item] = s
662+
622663
def _align_series(self, indexer, ser, multiindex_indexer=False):
623664
"""
624665
Parameters
@@ -1772,6 +1813,37 @@ def _convert_to_indexer(self, obj, axis=0, is_setter=False):
17721813
raise ValueError("Can only index by location with a [%s]" %
17731814
self._valid_types)
17741815

1816+
def _setter(self, idx, v, indexer, force_loc=False, **kwargs):
1817+
"""
1818+
Set a single value on the underlying object. Position-based by default.
1819+
1820+
Parameters
1821+
----------
1822+
idx : int
1823+
The index of the desired element
1824+
1825+
v : any
1826+
The value to assign to the specified location
1827+
1828+
indexer: list
1829+
The original indexer
1830+
1831+
force_loc: bool
1832+
If True, use location-based indexing.
1833+
1834+
Other keyword arguments are forwarded to _NDFrameIndexer._setter()
1835+
"""
1836+
1837+
if force_loc:
1838+
super(_iLocIndexer, self)._setter(idx, v, **kwargs)
1839+
else:
1840+
info_axis = self.obj._info_axis_number
1841+
max_idx = len(self.obj._get_axis(info_axis))
1842+
kwargs['items'] = _maybe_convert_indexer(indexer[info_axis],
1843+
max_idx)
1844+
with InfoCleaner(self.obj):
1845+
super(_iLocIndexer, self)._setter(idx, v, **kwargs)
1846+
17751847

17761848
class _ScalarAccessIndexer(_NDFrameIndexer):
17771849
""" access scalars quickly """

0 commit comments

Comments
 (0)