Skip to content

Commit 9ae43ba

Browse files
committed
BUG: Fix iloc with duplicate labels
closes pandas-dev#15686
1 parent a20009f commit 9ae43ba

File tree

3 files changed

+127
-33
lines changed

3 files changed

+127
-33
lines changed

doc/source/whatsnew/v0.20.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -821,6 +821,7 @@ Bug Fixes
821821
- Bug in using ``__deepcopy__`` on empty NDFrame objects (:issue:`15370`)
822822
- Bug in ``DataFrame.loc`` with indexing a ``MultiIndex`` with a ``Series`` indexer (:issue:`14730`, :issue:`15424`)
823823
- Bug in ``DataFrame.loc`` with indexing a ``MultiIndex`` with a numpy array (:issue:`15434`)
824+
- Bug in ``DataFrame.iloc`` with duplicate labels (:issue:`15686`)
824825
- Bug in ``Rolling.quantile`` function that caused a segmentation fault when called with a quantile value outside of the range [0, 1] (:issue:`15463`)
825826
- Bug in ``pd.cut()`` with a single bin on an all 0s array (:issue:`15428`)
826827
- Bug in ``pd.qcut()`` with a single quantile and an array with identical values (:issue:`15431`)

pandas/core/indexing.py

+105-33
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
_is_unorderable_exception,
1616
_ensure_platform_int)
1717
from pandas.types.missing import isnull, _infer_fill_value
18+
from pandas.types.cast import _maybe_convert_indexer
1819

1920
from pandas.core.index import Index, MultiIndex
2021

@@ -80,6 +81,24 @@ def __getitem__(self, arg):
8081
IndexSlice = _IndexSlice()
8182

8283

84+
class InfoCleaner:
85+
"""
86+
A context manager which temporarily removes labels on the "info" axis,
87+
replacing them with a RangeIndex, and then puts them back in place.
88+
Used to unambiguously index by position.
89+
"""
90+
def __init__(self, obj):
91+
self._obj = obj
92+
self._info_axis = self._obj._AXIS_NAMES[self._obj._info_axis_number]
93+
94+
def __enter__(self):
95+
self._old_col = getattr(self._obj, self._info_axis)
96+
setattr(self._obj, self._info_axis, range(len(self._old_col)))
97+
98+
def __exit__(self, *args):
99+
setattr(self._obj, self._info_axis, self._old_col)
100+
101+
83102
class IndexingError(Exception):
84103
pass
85104

@@ -491,29 +510,10 @@ def _setitem_with_indexer(self, indexer, value):
491510
else:
492511
lplane_indexer = 0
493512

494-
def setter(item, v):
495-
s = self.obj[item]
496-
pi = plane_indexer[0] if lplane_indexer == 1 else plane_indexer
497-
498-
# perform the equivalent of a setitem on the info axis
499-
# as we have a null slice or a slice with full bounds
500-
# which means essentially reassign to the columns of a
501-
# multi-dim object
502-
# GH6149 (null slice), GH10408 (full bounds)
503-
if (isinstance(pi, tuple) and
504-
all(is_null_slice(idx) or
505-
is_full_slice(idx, len(self.obj))
506-
for idx in pi)):
507-
s = v
508-
else:
509-
# set the item, possibly having a dtype change
510-
s._consolidate_inplace()
511-
s = s.copy()
512-
s._data = s._data.setitem(indexer=pi, value=v)
513-
s._maybe_update_cacher(clear=True)
514-
515-
# reset the sliced object if unique
516-
self.obj[item] = s
513+
setter_kwargs = {'items': labels,
514+
'indexer': indexer,
515+
'pi': plane_indexer[0] if lplane_indexer == 1
516+
else plane_indexer}
517517

518518
def can_do_equal_len():
519519
""" return True if we have an equal len settable """
@@ -541,7 +541,7 @@ def can_do_equal_len():
541541
sub_indexer = list(indexer)
542542
multiindex_indexer = isinstance(labels, MultiIndex)
543543

544-
for item in labels:
544+
for idx, item in enumerate(labels):
545545
if item in value:
546546
sub_indexer[info_axis] = item
547547
v = self._align_series(
@@ -550,7 +550,7 @@ def can_do_equal_len():
550550
else:
551551
v = np.nan
552552

553-
setter(item, v)
553+
self._setter(idx, v, force_loc=True, **setter_kwargs)
554554

555555
# we have an equal len ndarray/convertible to our labels
556556
elif np.array(value).ndim == 2:
@@ -562,14 +562,15 @@ def can_do_equal_len():
562562
raise ValueError('Must have equal len keys and value '
563563
'when setting with an ndarray')
564564

565-
for i, item in enumerate(labels):
565+
for i in range(len(labels)):
566566

567567
# setting with a list, recoerces
568-
setter(item, value[:, i].tolist())
568+
self._setter(i, value[:, i].tolist(), force_loc=True,
569+
**setter_kwargs)
569570

570571
# we have an equal len list/ndarray
571572
elif can_do_equal_len():
572-
setter(labels[0], value)
573+
self._setter(0, value, **setter_kwargs)
573574

574575
# per label values
575576
else:
@@ -578,13 +579,12 @@ def can_do_equal_len():
578579
raise ValueError('Must have equal len keys and value '
579580
'when setting with an iterable')
580581

581-
for item, v in zip(labels, value):
582-
setter(item, v)
582+
for i, v in zip(range(len(labels)), value):
583+
self._setter(i, v, **setter_kwargs)
583584
else:
584-
585585
# scalar
586-
for item in labels:
587-
setter(item, value)
586+
for idx in range(len(labels)):
587+
self._setter(idx, value, **setter_kwargs)
588588

589589
else:
590590
if isinstance(indexer, tuple):
@@ -618,6 +618,47 @@ def can_do_equal_len():
618618
value=value)
619619
self.obj._maybe_update_cacher(clear=True)
620620

621+
def _setter(self, idx, v, items, pi, **kwargs):
622+
"""
623+
Set a single value on the underlying object. Label-based.
624+
625+
Parameters
626+
----------
627+
idx : int
628+
The index of the desired element inside "items"
629+
630+
v : any
631+
The value to assign to the specified location
632+
633+
items: list
634+
A list of labels
635+
636+
pi: tuple or list-like
637+
Components of original indexer preceding the info axis
638+
"""
639+
item = items[idx]
640+
s = self.obj[item]
641+
642+
# perform the equivalent of a setitem on the info axis
643+
# as we have a null slice or a slice with full bounds
644+
# which means essentially reassign to the columns of a
645+
# multi-dim object
646+
# GH6149 (null slice), GH10408 (full bounds)
647+
if (isinstance(pi, tuple) and
648+
all(is_null_slice(ix) or
649+
is_full_slice(ix, len(self.obj))
650+
for ix in pi)):
651+
s = v
652+
else:
653+
# set the item, possibly having a dtype change
654+
s._consolidate_inplace()
655+
s = s.copy()
656+
s._data = s._data.setitem(indexer=pi, value=v)
657+
s._maybe_update_cacher(clear=True)
658+
659+
# reset the sliced object if unique
660+
self.obj[item] = s
661+
621662
def _align_series(self, indexer, ser, multiindex_indexer=False):
622663
"""
623664
Parameters
@@ -1761,6 +1802,37 @@ def _convert_to_indexer(self, obj, axis=0, is_setter=False):
17611802
raise ValueError("Can only index by location with a [%s]" %
17621803
self._valid_types)
17631804

1805+
def _setter(self, idx, v, indexer, force_loc=False, **kwargs):
1806+
"""
1807+
Set a single value on the underlying object. Position-based by default.
1808+
1809+
Parameters
1810+
----------
1811+
idx : int
1812+
The index of the desired element
1813+
1814+
v : any
1815+
The value to assign to the specified location
1816+
1817+
indexer: list
1818+
The original indexer
1819+
1820+
force_loc: bool
1821+
If True, use location-based indexing.
1822+
1823+
Other keyword arguments are forwarded to _NDFrameIndexer._setter()
1824+
"""
1825+
1826+
if force_loc:
1827+
super(_iLocIndexer, self)._setter(idx, v, **kwargs)
1828+
else:
1829+
info_axis = self.obj._info_axis_number
1830+
max_idx = len(self.obj._get_axis(info_axis))
1831+
kwargs['items'] = _maybe_convert_indexer(indexer[info_axis],
1832+
max_idx)
1833+
with InfoCleaner(self.obj):
1834+
super(_iLocIndexer, self)._setter(idx, v, **kwargs)
1835+
17641836

17651837
class _ScalarAccessIndexer(_NDFrameIndexer):
17661838
""" access scalars quickly """

pandas/types/cast.py

+21
Original file line numberDiff line numberDiff line change
@@ -939,3 +939,24 @@ def _find_common_type(types):
939939
return np.object
940940

941941
return np.find_common_type(types, [])
942+
943+
944+
def _maybe_convert_indexer(indexer, until):
945+
"""
946+
Convert slice, tuple, list or scalar "indexer" to 1-d array of indices,
947+
using "until" as maximum for upwards open slices.
948+
"""
949+
950+
if is_scalar(indexer):
951+
return np.array([indexer], dtype=int)
952+
953+
if isinstance(indexer, np.ndarray):
954+
if indexer.dtype == bool:
955+
return np.where(indexer)[0]
956+
return indexer
957+
958+
if isinstance(indexer, slice):
959+
stop = until if indexer.stop is None else indexer.stop
960+
return np.arange(stop, dtype=int)[indexer]
961+
962+
return np.array(indexer, dtype=int)

0 commit comments

Comments
 (0)