Skip to content

Commit 094761f

Browse files
Saravia RajalSaravia Rajal
Saravia Rajal
authored and
Saravia Rajal
committed
BUG: cant modify df with duplicate index (pandas-dev#17105)
1 parent b02c69a commit 094761f

File tree

3 files changed

+46
-1
lines changed

3 files changed

+46
-1
lines changed

doc/source/whatsnew/v0.23.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -1243,6 +1243,7 @@ Indexing
12431243
- Bug in ``Series.is_unique`` where extraneous output in stderr is shown if Series contains objects with ``__ne__`` defined (:issue:`20661`)
12441244
- Bug in ``.loc`` assignment with a single-element list-like incorrectly assigns as a list (:issue:`19474`)
12451245
- Bug in partial string indexing on a ``Series/DataFrame`` with a monotonic decreasing ``DatetimeIndex`` (:issue:`19362`)
1246+
- Bug in performing in-place operations on a DataFrame with a duplicate Index (:issue:`17105`)
12461247

12471248
MultiIndex
12481249
^^^^^^^^^^

pandas/core/indexing.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1319,7 +1319,7 @@ def _convert_to_indexer(self, obj, axis=None, is_setter=False):
13191319
(indexer,
13201320
missing) = labels.get_indexer_non_unique(objarr)
13211321
# 'indexer' has dupes, create 'check' using 'missing'
1322-
check = np.zeros_like(objarr)
1322+
check = np.zeros(len(objarr))
13231323
check[missing] = -1
13241324

13251325
mask = check == -1

pandas/tests/indexing/test_loc.py

+44
Original file line numberDiff line numberDiff line change
@@ -784,3 +784,47 @@ def convert_nested_indexer(indexer_type, keys):
784784
index=pd.MultiIndex.from_product(keys))
785785

786786
tm.assert_series_equal(result, expected)
787+
788+
def test_modify_with_duplicate_index_assigning(self):
789+
""" see issue #17105 """
790+
791+
trange = pd.date_range(start=pd.Timestamp(year=2017, month=1, day=1),
792+
end=pd.Timestamp(year=2017, month=1, day=5))
793+
794+
# insert a duplicate element to the index
795+
trange = trange.insert(loc=5,
796+
item=pd.Timestamp(year=2017, month=1, day=5))
797+
798+
df = pd.DataFrame(0, index=trange, columns=["A", "B"])
799+
bool_idx = np.array([False, False, False, False, False, True])
800+
801+
# modify the value for the duplicate index entry
802+
df.loc[trange[bool_idx], "A"] = 6
803+
804+
expected = pd.DataFrame({'A': [0, 0, 0, 0, 6, 6],
805+
'B': [0, 0, 0, 0, 0, 0]},
806+
index=trange)
807+
808+
tm.assert_frame_equal(df, expected)
809+
810+
def test_modify_with_duplicate_index_adding(self):
811+
""" see issue #17105 """
812+
813+
trange = pd.date_range(start=pd.Timestamp(year=2017, month=1, day=1),
814+
end=pd.Timestamp(year=2017, month=1, day=5))
815+
816+
# insert a duplicate element to the index
817+
trange = trange.insert(loc=5,
818+
item=pd.Timestamp(year=2017, month=1, day=5))
819+
820+
df = pd.DataFrame(0, index=trange, columns=["A", "B"])
821+
bool_idx = np.array([False, False, False, False, False, True])
822+
823+
# modify the value for the duplicate index entry
824+
df.loc[trange[bool_idx], "A"] += 7
825+
826+
expected = pd.DataFrame({'A': [0, 0, 0, 0, 7, 7],
827+
'B': [0, 0, 0, 0, 0, 0]},
828+
index=trange)
829+
830+
tm.assert_frame_equal(df, expected)

0 commit comments

Comments
 (0)