Skip to content

Commit d15c104

Browse files
fersarrjreback
authored andcommitted
BUG: cant modify df with duplicate index (#17105) (#20939)
1 parent 620784f commit d15c104

File tree

3 files changed

+28
-1
lines changed

3 files changed

+28
-1
lines changed

doc/source/whatsnew/v0.23.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -1253,6 +1253,7 @@ Indexing
12531253
- Bug in ``Series.is_unique`` where extraneous output in stderr is shown if Series contains objects with ``__ne__`` defined (:issue:`20661`)
12541254
- Bug in ``.loc`` assignment with a single-element list-like incorrectly assigns as a list (:issue:`19474`)
12551255
- Bug in partial string indexing on a ``Series/DataFrame`` with a monotonic decreasing ``DatetimeIndex`` (:issue:`19362`)
1256+
- Bug in performing in-place operations on a ``DataFrame`` with a duplicate ``Index`` (:issue:`17105`)
12561257
- Bug in :meth:`IntervalIndex.get_loc` and :meth:`IntervalIndex.get_indexer` when used with an :class:`IntervalIndex` containing a single interval (:issue:`17284`, :issue:`20921`)
12571258

12581259
MultiIndex

pandas/core/indexing.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1318,7 +1318,7 @@ def _convert_to_indexer(self, obj, axis=None, is_setter=False):
13181318
(indexer,
13191319
missing) = labels.get_indexer_non_unique(objarr)
13201320
# 'indexer' has dupes, create 'check' using 'missing'
1321-
check = np.zeros_like(objarr)
1321+
check = np.zeros(len(objarr))
13221322
check[missing] = -1
13231323

13241324
mask = check == -1

pandas/tests/frame/test_indexing.py

+26
Original file line numberDiff line numberDiff line change
@@ -1929,6 +1929,32 @@ def test_iloc_duplicates(self):
19291929
expected = df.take([0], axis=1)
19301930
assert_frame_equal(result, expected)
19311931

1932+
def test_loc_duplicates(self):
1933+
# gh-17105
1934+
1935+
# insert a duplicate element to the index
1936+
trange = pd.date_range(start=pd.Timestamp(year=2017, month=1, day=1),
1937+
end=pd.Timestamp(year=2017, month=1, day=5))
1938+
1939+
trange = trange.insert(loc=5,
1940+
item=pd.Timestamp(year=2017, month=1, day=5))
1941+
1942+
df = pd.DataFrame(0, index=trange, columns=["A", "B"])
1943+
bool_idx = np.array([False, False, False, False, False, True])
1944+
1945+
# assignment
1946+
df.loc[trange[bool_idx], "A"] = 6
1947+
1948+
expected = pd.DataFrame({'A': [0, 0, 0, 0, 6, 6],
1949+
'B': [0, 0, 0, 0, 0, 0]},
1950+
index=trange)
1951+
tm.assert_frame_equal(df, expected)
1952+
1953+
# in-place
1954+
df = pd.DataFrame(0, index=trange, columns=["A", "B"])
1955+
df.loc[trange[bool_idx], "A"] += 6
1956+
tm.assert_frame_equal(df, expected)
1957+
19321958
def test_iloc_sparse_propegate_fill_value(self):
19331959
from pandas.core.sparse.api import SparseDataFrame
19341960
df = SparseDataFrame({'A': [999, 1]}, default_fill_value=999)

0 commit comments

Comments
 (0)