Skip to content

Commit d0e2a9f

Browse files
committed
Merge pull request #6495 from jreback/iat
BUG: Bug in iat/iloc with duplicate indices on a Series (6493)
2 parents 8cd9819 + a54eae5 commit d0e2a9f

File tree

9 files changed

+103
-28
lines changed

9 files changed

+103
-28
lines changed

doc/source/release.rst

+1
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,7 @@ Bug Fixes
183183
- Bug in ``io.data.DataReader`` when passed ``"F-F_Momentum_Factor"`` and ``data_source="famafrench"`` (:issue:`6460`)
184184
- Bug in ``sum`` of a ``timedelta64[ns]`` series (:issue:`6462`)
185185
- Bug in ``resample`` with a timezone and certain offsets (:issue:`6397`)
186+
- Bug in ``iat/iloc`` with duplicate indices on a Series (:issue:`6493`)
186187

187188
pandas 0.13.1
188189
-------------

pandas/core/frame.py

+13-2
Original file line numberDiff line numberDiff line change
@@ -1519,24 +1519,30 @@ def _unpickle_matrix_compat(self, state): # pragma: no cover
15191519
#----------------------------------------------------------------------
15201520
# Getting and setting elements
15211521

1522-
def get_value(self, index, col):
1522+
def get_value(self, index, col, takeable=False):
15231523
"""
15241524
Quickly retrieve single value at passed column and index
15251525
15261526
Parameters
15271527
----------
15281528
index : row label
15291529
col : column label
1530+
takeable : interpret the index/col as indexers, default False
15301531
15311532
Returns
15321533
-------
15331534
value : scalar value
15341535
"""
1536+
1537+
if takeable is True:
1538+
series = self._iget_item_cache(col)
1539+
return series.values[index]
1540+
15351541
series = self._get_item_cache(col)
15361542
engine = self.index._engine
15371543
return engine.get_value(series.values, index)
15381544

1539-
def set_value(self, index, col, value):
1545+
def set_value(self, index, col, value, takeable=False):
15401546
"""
15411547
Put single value at passed column and index
15421548
@@ -1545,6 +1551,7 @@ def set_value(self, index, col, value):
15451551
index : row label
15461552
col : column label
15471553
value : scalar value
1554+
takeable : interpret the index/col as indexers, default False
15481555
15491556
Returns
15501557
-------
@@ -1553,6 +1560,10 @@ def set_value(self, index, col, value):
15531560
otherwise a new object
15541561
"""
15551562
try:
1563+
if takeable is True:
1564+
series = self._iget_item_cache(col)
1565+
return series.set_value(index, value, takeable=True)
1566+
15561567
series = self._get_item_cache(col)
15571568
engine = self.index._engine
15581569
engine.set_value(series.values, index, value)

pandas/core/generic.py

+12-1
Original file line numberDiff line numberDiff line change
@@ -1004,6 +1004,7 @@ def __getitem__(self, item):
10041004
return self._get_item_cache(item)
10051005

10061006
def _get_item_cache(self, item):
1007+
""" return the cached item, item represents a label indexer """
10071008
cache = self._item_cache
10081009
res = cache.get(item)
10091010
if res is None:
@@ -1021,6 +1022,15 @@ def _set_as_cached(self, item, cacher):
10211022
a weakref to cacher """
10221023
self._cacher = (item, weakref.ref(cacher))
10231024

1025+
def _iget_item_cache(self, item):
1026+
""" return the cached item, item represents a positional indexer """
1027+
ax = self._info_axis
1028+
if ax.is_unique:
1029+
lower = self._get_item_cache(ax[item])
1030+
else:
1031+
lower = self.take(item, axis=self._info_axis_number, convert=True)
1032+
return lower
1033+
10241034
def _box_item_values(self, key, values):
10251035
raise NotImplementedError
10261036

@@ -1595,7 +1605,8 @@ def _reindex_axes(self, axes, level, limit, method, fill_value, copy,
15951605

15961606
obj = obj._reindex_with_indexers(
15971607
{axis: [new_index, indexer]}, method=method,
1598-
fill_value=fill_value, limit=limit, copy=copy)
1608+
fill_value=fill_value, limit=limit, copy=copy,
1609+
allow_dups=takeable)
15991610

16001611
return obj
16011612

pandas/core/indexing.py

+6-7
Original file line numberDiff line numberDiff line change
@@ -1419,41 +1419,40 @@ def __getitem__(self, key):
14191419
raise ValueError('Invalid call for scalar access (getting)!')
14201420

14211421
key = self._convert_key(key)
1422-
return self.obj.get_value(*key)
1422+
return self.obj.get_value(*key, takeable=self._takeable)
14231423

14241424
def __setitem__(self, key, value):
14251425
if not isinstance(key, tuple):
14261426
key = self._tuplify(key)
14271427
if len(key) != self.obj.ndim:
14281428
raise ValueError('Not enough indexers for scalar access '
14291429
'(setting)!')
1430-
key = self._convert_key(key)
1430+
key = list(self._convert_key(key))
14311431
key.append(value)
1432-
self.obj.set_value(*key)
1432+
self.obj.set_value(*key, takeable=self._takeable)
14331433

14341434

14351435
class _AtIndexer(_ScalarAccessIndexer):
14361436

14371437
""" label based scalar accessor """
1438-
pass
1438+
_takeable = False
14391439

14401440

14411441
class _iAtIndexer(_ScalarAccessIndexer):
14421442

14431443
""" integer based scalar accessor """
1444+
_takeable = True
14441445

14451446
def _has_valid_setitem_indexer(self, indexer):
14461447
self._has_valid_positional_setitem_indexer(indexer)
14471448

14481449
def _convert_key(self, key):
14491450
""" require integer args (and convert to label arguments) """
1450-
ckey = []
14511451
for a, i in zip(self.obj.axes, key):
14521452
if not com.is_integer(i):
14531453
raise ValueError("iAt based indexing can only have integer "
14541454
"indexers")
1455-
ckey.append(a[i])
1456-
return ckey
1455+
return key
14571456

14581457
# 32-bit floating point machine epsilon
14591458
_eps = np.finfo('f4').eps

pandas/core/panel.py

+18-7
Original file line numberDiff line numberDiff line change
@@ -444,7 +444,7 @@ def as_matrix(self):
444444
#----------------------------------------------------------------------
445445
# Getting and setting elements
446446

447-
def get_value(self, *args):
447+
def get_value(self, *args, **kwargs):
448448
"""
449449
Quickly retrieve single value at (item, major, minor) location
450450
@@ -453,6 +453,7 @@ def get_value(self, *args):
453453
item : item label (panel item)
454454
major : major axis label (panel item row)
455455
minor : minor axis label (panel item column)
456+
takeable : interpret the passed labels as indexers, default False
456457
457458
Returns
458459
-------
@@ -466,12 +467,16 @@ def get_value(self, *args):
466467
raise TypeError('There must be an argument for each axis, you gave'
467468
' {0} args, but {1} are required'.format(nargs,
468469
nreq))
470+
takeable = kwargs.get('takeable')
469471

470-
# hm, two layers to the onion
471-
frame = self._get_item_cache(args[0])
472-
return frame.get_value(*args[1:])
472+
if takeable is True:
473+
lower = self._iget_item_cache(args[0])
474+
else:
475+
lower = self._get_item_cache(args[0])
476+
477+
return lower.get_value(*args[1:], takeable=takeable)
473478

474-
def set_value(self, *args):
479+
def set_value(self, *args, **kwargs):
475480
"""
476481
Quickly set single value at (item, major, minor) location
477482
@@ -481,6 +486,7 @@ def set_value(self, *args):
481486
major : major axis label (panel item row)
482487
minor : minor axis label (panel item column)
483488
value : scalar
489+
takeable : interpret the passed labels as indexers, default False
484490
485491
Returns
486492
-------
@@ -496,10 +502,15 @@ def set_value(self, *args):
496502
raise TypeError('There must be an argument for each axis plus the '
497503
'value provided, you gave {0} args, but {1} are '
498504
'required'.format(nargs, nreq))
505+
takeable = kwargs.get('takeable')
499506

500507
try:
501-
frame = self._get_item_cache(args[0])
502-
frame.set_value(*args[1:])
508+
if takeable is True:
509+
lower = self._iget_item_cache(args[0])
510+
else:
511+
lower = self._get_item_cache(args[0])
512+
513+
lower.set_value(*args[1:], takeable=takeable)
503514
return self
504515
except KeyError:
505516
axes = self._expand_axes(args)

pandas/core/series.py

+10-3
Original file line numberDiff line numberDiff line change
@@ -725,21 +725,24 @@ def reshape(self, *args, **kwargs):
725725
iget = _ixs
726726
irow = _ixs
727727

728-
def get_value(self, label):
728+
def get_value(self, label, takeable=False):
729729
"""
730730
Quickly retrieve single value at passed index label
731731
732732
Parameters
733733
----------
734734
index : label
735+
takeable : interpret the index as indexers, default False
735736
736737
Returns
737738
-------
738739
value : scalar value
739740
"""
741+
if takeable is True:
742+
return self.values[label]
740743
return self.index.get_value(self.values, label)
741744

742-
def set_value(self, label, value):
745+
def set_value(self, label, value, takeable=False):
743746
"""
744747
Quickly set single value at passed label. If label is not contained, a
745748
new object is created with the label placed at the end of the result
@@ -751,6 +754,7 @@ def set_value(self, label, value):
751754
Partial indexing with MultiIndex not allowed
752755
value : object
753756
Scalar value
757+
takeable : interpret the index as indexers, default False
754758
755759
Returns
756760
-------
@@ -759,7 +763,10 @@ def set_value(self, label, value):
759763
otherwise a new object
760764
"""
761765
try:
762-
self.index._engine.set_value(self.values, label, value)
766+
if takeable:
767+
self.values[label] = value
768+
else:
769+
self.index._engine.set_value(self.values, label, value)
763770
return self
764771
except KeyError:
765772

pandas/sparse/frame.py

+10-4
Original file line numberDiff line numberDiff line change
@@ -346,10 +346,15 @@ def __getitem__(self, key):
346346
return self._get_item_cache(key)
347347

348348
@Appender(DataFrame.get_value.__doc__, indents=0)
349-
def get_value(self, index, col):
350-
return self._get_item_cache(col).get_value(index)
349+
def get_value(self, index, col, takeable=False):
350+
if takeable is True:
351+
series = self._iget_item_cache(col)
352+
else:
353+
series = self._get_item_cache(col)
354+
355+
return series.get_value(index, takeable=takeable)
351356

352-
def set_value(self, index, col, value):
357+
def set_value(self, index, col, value, takeable=False):
353358
"""
354359
Put single value at passed column and index
355360
@@ -358,6 +363,7 @@ def set_value(self, index, col, value):
358363
index : row label
359364
col : column label
360365
value : scalar value
366+
takeable : interpret the index/col as indexers, default False
361367
362368
Notes
363369
-----
@@ -369,7 +375,7 @@ def set_value(self, index, col, value):
369375
-------
370376
frame : DataFrame
371377
"""
372-
dense = self.to_dense().set_value(index, col, value)
378+
dense = self.to_dense().set_value(index, col, value, takeable=takeable)
373379
return dense.to_sparse(kind=self._default_kind,
374380
fill_value=self._default_fill_value)
375381

pandas/sparse/series.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -409,22 +409,23 @@ def get(self, label, default=None):
409409
else:
410410
return default
411411

412-
def get_value(self, label):
412+
def get_value(self, label, takeable=False):
413413
"""
414414
Retrieve single value at passed index label
415415
416416
Parameters
417417
----------
418418
index : label
419+
takeable : interpret the index as indexers, default False
419420
420421
Returns
421422
-------
422423
value : scalar value
423424
"""
424-
loc = self.index.get_loc(label)
425+
loc = label if takeable is True else self.index.get_loc(label)
425426
return self._get_val_at(loc)
426427

427-
def set_value(self, label, value):
428+
def set_value(self, label, value, takeable=False):
428429
"""
429430
Quickly set single value at passed label. If label is not contained, a
430431
new object is created with the label placed at the end of the result
@@ -436,6 +437,7 @@ def set_value(self, label, value):
436437
Partial indexing with MultiIndex not allowed
437438
value : object
438439
Scalar value
440+
takeable : interpret the index as indexers, default False
439441
440442
Notes
441443
-----
@@ -450,7 +452,7 @@ def set_value(self, label, value):
450452

451453
# if the label doesn't exist, we will create a new object here
452454
# and possibily change the index
453-
new_values = values.set_value(label, value)
455+
new_values = values.set_value(label, value, takeable=takeable)
454456
if new_values is not None:
455457
values = new_values
456458
new_index = values.index

pandas/tests/test_indexing.py

+27
Original file line numberDiff line numberDiff line change
@@ -334,6 +334,33 @@ def test_at_timestamp(self):
334334
def test_iat_invalid_args(self):
335335
pass
336336

337+
def test_imethods_with_dups(self):
338+
339+
# GH6493
340+
# iat/iloc with dups
341+
342+
s = Series(range(5), index=[1,1,2,2,3])
343+
result = s.iloc[2]
344+
self.assertEqual(result,2)
345+
result = s.iat[2]
346+
self.assertEqual(result,2)
347+
348+
self.assertRaises(IndexError, lambda : s.iat[10])
349+
self.assertRaises(IndexError, lambda : s.iat[-10])
350+
351+
result = s.iloc[[2,3]]
352+
expected = Series([2,3],[2,2],dtype='int64')
353+
assert_series_equal(result,expected)
354+
355+
df = s.to_frame()
356+
result = df.iloc[2]
357+
expected = Series(2,index=[0])
358+
assert_series_equal(result,expected)
359+
360+
result = df.iat[2,0]
361+
expected = 2
362+
self.assertEqual(result,2)
363+
337364
def test_repeated_getitem_dups(self):
338365
# GH 5678
339366
# repeated gettitems on a dup index returing a ndarray

0 commit comments

Comments
 (0)