diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index e3b4eb5e22dec..c1f588b0072fd 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -1253,6 +1253,7 @@ Indexing - Bug in ``Series.is_unique`` where extraneous output in stderr is shown if Series contains objects with ``__ne__`` defined (:issue:`20661`) - Bug in ``.loc`` assignment with a single-element list-like incorrectly assigns as a list (:issue:`19474`) - Bug in partial string indexing on a ``Series/DataFrame`` with a monotonic decreasing ``DatetimeIndex`` (:issue:`19362`) +- Bug in performing in-place operations on a ``DataFrame`` with a duplicate ``Index`` (:issue:`17105`) - Bug in :meth:`IntervalIndex.get_loc` and :meth:`IntervalIndex.get_indexer` when used with an :class:`IntervalIndex` containing a single interval (:issue:`17284`, :issue:`20921`) MultiIndex diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 7a7e47803c240..858d08d73e603 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1318,7 +1318,7 @@ def _convert_to_indexer(self, obj, axis=None, is_setter=False): (indexer, missing) = labels.get_indexer_non_unique(objarr) # 'indexer' has dupes, create 'check' using 'missing' - check = np.zeros_like(objarr) + check = np.zeros(len(objarr)) check[missing] = -1 mask = check == -1 diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index b887b1c9f1218..6d74ce54faa94 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -1929,6 +1929,32 @@ def test_iloc_duplicates(self): expected = df.take([0], axis=1) assert_frame_equal(result, expected) + def test_loc_duplicates(self): + # gh-17105 + + # insert a duplicate element to the index + trange = pd.date_range(start=pd.Timestamp(year=2017, month=1, day=1), + end=pd.Timestamp(year=2017, month=1, day=5)) + + trange = trange.insert(loc=5, + item=pd.Timestamp(year=2017, month=1, day=5)) + + df = pd.DataFrame(0, index=trange, columns=["A", "B"]) + bool_idx = np.array([False, False, False, False, False, True]) + + # assignment + df.loc[trange[bool_idx], "A"] = 6 + + expected = pd.DataFrame({'A': [0, 0, 0, 0, 6, 6], + 'B': [0, 0, 0, 0, 0, 0]}, + index=trange) + tm.assert_frame_equal(df, expected) + + # in-place + df = pd.DataFrame(0, index=trange, columns=["A", "B"]) + df.loc[trange[bool_idx], "A"] += 6 + tm.assert_frame_equal(df, expected) + def test_iloc_sparse_propegate_fill_value(self): from pandas.core.sparse.api import SparseDataFrame df = SparseDataFrame({'A': [999, 1]}, default_fill_value=999)