Skip to content

Commit b3096bb

Browse files
committed
BUG: Fix iloc with duplicate labels
closes pandas-dev#15686
1 parent 0ea0f25 commit b3096bb

File tree

3 files changed

+127
-34
lines changed

3 files changed

+127
-34
lines changed

doc/source/whatsnew/v0.20.2.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ Indexing
4646
^^^^^^^^
4747

4848
- Bug in ``DataFrame.reset_index(level=)`` with single level index (:issue:`16263`)
49-
49+
- Bug in ``DataFrame.iloc`` with duplicate labels (:issue:`15686`)
5050

5151
I/O
5252
^^^

pandas/core/dtypes/cast.py

+21
Original file line numberDiff line numberDiff line change
@@ -1026,3 +1026,24 @@ def find_common_type(types):
10261026
return np.object
10271027

10281028
return np.find_common_type(types, [])
1029+
1030+
1031+
def _maybe_convert_indexer(indexer, until):
1032+
"""
1033+
Convert slice, tuple, list or scalar "indexer" to 1-d array of indices,
1034+
using "until" as maximum for upwards open slices.
1035+
"""
1036+
1037+
if is_scalar(indexer):
1038+
return np.array([indexer], dtype=int)
1039+
1040+
if isinstance(indexer, np.ndarray):
1041+
if indexer.dtype == bool:
1042+
return np.where(indexer)[0]
1043+
return indexer
1044+
1045+
if isinstance(indexer, slice):
1046+
stop = until if indexer.stop is None else indexer.stop
1047+
return np.arange(stop, dtype=int)[indexer]
1048+
1049+
return np.array(indexer, dtype=int)

pandas/core/indexing.py

100755100644
+105-33
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
_is_unorderable_exception,
1717
_ensure_platform_int)
1818
from pandas.core.dtypes.missing import isnull, _infer_fill_value
19+
from pandas.core.dtypes.cast import _maybe_convert_indexer
1920

2021
from pandas.core.index import Index, MultiIndex
2122

@@ -81,6 +82,24 @@ def __getitem__(self, arg):
8182
IndexSlice = _IndexSlice()
8283

8384

85+
class InfoCleaner:
86+
"""
87+
A context manager which temporarily removes labels on the "info" axis,
88+
replacing them with a RangeIndex, and then puts them back in place.
89+
Used to unambiguously index by position.
90+
"""
91+
def __init__(self, obj):
92+
self._obj = obj
93+
self._info_axis = self._obj._AXIS_NAMES[self._obj._info_axis_number]
94+
95+
def __enter__(self):
96+
self._old_col = getattr(self._obj, self._info_axis)
97+
setattr(self._obj, self._info_axis, range(len(self._old_col)))
98+
99+
def __exit__(self, *args):
100+
setattr(self._obj, self._info_axis, self._old_col)
101+
102+
84103
class IndexingError(Exception):
85104
pass
86105

@@ -492,29 +511,10 @@ def _setitem_with_indexer(self, indexer, value):
492511
else:
493512
lplane_indexer = 0
494513

495-
def setter(item, v):
496-
s = self.obj[item]
497-
pi = plane_indexer[0] if lplane_indexer == 1 else plane_indexer
498-
499-
# perform the equivalent of a setitem on the info axis
500-
# as we have a null slice or a slice with full bounds
501-
# which means essentially reassign to the columns of a
502-
# multi-dim object
503-
# GH6149 (null slice), GH10408 (full bounds)
504-
if (isinstance(pi, tuple) and
505-
all(is_null_slice(idx) or
506-
is_full_slice(idx, len(self.obj))
507-
for idx in pi)):
508-
s = v
509-
else:
510-
# set the item, possibly having a dtype change
511-
s._consolidate_inplace()
512-
s = s.copy()
513-
s._data = s._data.setitem(indexer=pi, value=v)
514-
s._maybe_update_cacher(clear=True)
515-
516-
# reset the sliced object if unique
517-
self.obj[item] = s
514+
setter_kwargs = {'items': labels,
515+
'indexer': indexer,
516+
'pi': plane_indexer[0] if lplane_indexer == 1
517+
else plane_indexer}
518518

519519
def can_do_equal_len():
520520
""" return True if we have an equal len settable """
@@ -542,7 +542,7 @@ def can_do_equal_len():
542542
sub_indexer = list(indexer)
543543
multiindex_indexer = isinstance(labels, MultiIndex)
544544

545-
for item in labels:
545+
for idx, item in enumerate(labels):
546546
if item in value:
547547
sub_indexer[info_axis] = item
548548
v = self._align_series(
@@ -551,7 +551,7 @@ def can_do_equal_len():
551551
else:
552552
v = np.nan
553553

554-
setter(item, v)
554+
self._setter(idx, v, force_loc=True, **setter_kwargs)
555555

556556
# we have an equal len ndarray/convertible to our labels
557557
elif np.array(value).ndim == 2:
@@ -563,14 +563,15 @@ def can_do_equal_len():
563563
raise ValueError('Must have equal len keys and value '
564564
'when setting with an ndarray')
565565

566-
for i, item in enumerate(labels):
566+
for i in range(len(labels)):
567567

568568
# setting with a list, recoerces
569-
setter(item, value[:, i].tolist())
569+
self._setter(i, value[:, i].tolist(), force_loc=True,
570+
**setter_kwargs)
570571

571572
# we have an equal len list/ndarray
572573
elif can_do_equal_len():
573-
setter(labels[0], value)
574+
self._setter(0, value, **setter_kwargs)
574575

575576
# per label values
576577
else:
@@ -579,13 +580,12 @@ def can_do_equal_len():
579580
raise ValueError('Must have equal len keys and value '
580581
'when setting with an iterable')
581582

582-
for item, v in zip(labels, value):
583-
setter(item, v)
583+
for i, v in zip(range(len(labels)), value):
584+
self._setter(i, v, **setter_kwargs)
584585
else:
585-
586586
# scalar
587-
for item in labels:
588-
setter(item, value)
587+
for idx in range(len(labels)):
588+
self._setter(idx, value, **setter_kwargs)
589589

590590
else:
591591
if isinstance(indexer, tuple):
@@ -619,6 +619,47 @@ def can_do_equal_len():
619619
value=value)
620620
self.obj._maybe_update_cacher(clear=True)
621621

622+
def _setter(self, idx, v, items, pi, **kwargs):
623+
"""
624+
Set a single value on the underlying object. Label-based.
625+
626+
Parameters
627+
----------
628+
idx : int
629+
The index of the desired element inside "items"
630+
631+
v : any
632+
The value to assign to the specified location
633+
634+
items: list
635+
A list of labels
636+
637+
pi: tuple or list-like
638+
Components of original indexer preceding the info axis
639+
"""
640+
item = items[idx]
641+
s = self.obj[item]
642+
643+
# perform the equivalent of a setitem on the info axis
644+
# as we have a null slice or a slice with full bounds
645+
# which means essentially reassign to the columns of a
646+
# multi-dim object
647+
# GH6149 (null slice), GH10408 (full bounds)
648+
if (isinstance(pi, tuple) and
649+
all(is_null_slice(ix) or
650+
is_full_slice(ix, len(self.obj))
651+
for ix in pi)):
652+
s = v
653+
else:
654+
# set the item, possibly having a dtype change
655+
s._consolidate_inplace()
656+
s = s.copy()
657+
s._data = s._data.setitem(indexer=pi, value=v)
658+
s._maybe_update_cacher(clear=True)
659+
660+
# reset the sliced object if unique
661+
self.obj[item] = s
662+
622663
def _align_series(self, indexer, ser, multiindex_indexer=False):
623664
"""
624665
Parameters
@@ -1766,6 +1807,37 @@ def _convert_to_indexer(self, obj, axis=0, is_setter=False):
17661807
raise ValueError("Can only index by location with a [%s]" %
17671808
self._valid_types)
17681809

1810+
def _setter(self, idx, v, indexer, force_loc=False, **kwargs):
1811+
"""
1812+
Set a single value on the underlying object. Position-based by default.
1813+
1814+
Parameters
1815+
----------
1816+
idx : int
1817+
The index of the desired element
1818+
1819+
v : any
1820+
The value to assign to the specified location
1821+
1822+
indexer: list
1823+
The original indexer
1824+
1825+
force_loc: bool
1826+
If True, use location-based indexing.
1827+
1828+
Other keyword arguments are forwarded to _NDFrameIndexer._setter()
1829+
"""
1830+
1831+
if force_loc:
1832+
super(_iLocIndexer, self)._setter(idx, v, **kwargs)
1833+
else:
1834+
info_axis = self.obj._info_axis_number
1835+
max_idx = len(self.obj._get_axis(info_axis))
1836+
kwargs['items'] = _maybe_convert_indexer(indexer[info_axis],
1837+
max_idx)
1838+
with InfoCleaner(self.obj):
1839+
super(_iLocIndexer, self)._setter(idx, v, **kwargs)
1840+
17691841

17701842
class _ScalarAccessIndexer(_NDFrameIndexer):
17711843
""" access scalars quickly """

0 commit comments

Comments
 (0)