From 094761ff17dc866e1aaf9093cf50ab5f1d7c94dc Mon Sep 17 00:00:00 2001 From: Saravia Rajal Date: Thu, 3 May 2018 08:59:15 +0100 Subject: [PATCH 1/3] BUG: cant modify df with duplicate index (#17105) --- doc/source/whatsnew/v0.23.0.txt | 1 + pandas/core/indexing.py | 2 +- pandas/tests/indexing/test_loc.py | 44 +++++++++++++++++++++++++++++++ 3 files changed, 46 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index d3746d9e0b61e..f2a90213d13ea 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -1243,6 +1243,7 @@ Indexing - Bug in ``Series.is_unique`` where extraneous output in stderr is shown if Series contains objects with ``__ne__`` defined (:issue:`20661`) - Bug in ``.loc`` assignment with a single-element list-like incorrectly assigns as a list (:issue:`19474`) - Bug in partial string indexing on a ``Series/DataFrame`` with a monotonic decreasing ``DatetimeIndex`` (:issue:`19362`) +- Bug in performing in-place operations on a DataFrame with a duplicate Index (:issue:`17105`) MultiIndex ^^^^^^^^^^ diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index d23beba1c534d..769f4951b385a 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1319,7 +1319,7 @@ def _convert_to_indexer(self, obj, axis=None, is_setter=False): (indexer, missing) = labels.get_indexer_non_unique(objarr) # 'indexer' has dupes, create 'check' using 'missing' - check = np.zeros_like(objarr) + check = np.zeros(len(objarr)) check[missing] = -1 mask = check == -1 diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 6ccff7e898a6a..100233ab33d51 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -784,3 +784,47 @@ def convert_nested_indexer(indexer_type, keys): index=pd.MultiIndex.from_product(keys)) tm.assert_series_equal(result, expected) + + def test_modify_with_duplicate_index_assigning(self): + """ see issue #17105 """ + + trange = pd.date_range(start=pd.Timestamp(year=2017, month=1, day=1), + end=pd.Timestamp(year=2017, month=1, day=5)) + + # insert a duplicate element to the index + trange = trange.insert(loc=5, + item=pd.Timestamp(year=2017, month=1, day=5)) + + df = pd.DataFrame(0, index=trange, columns=["A", "B"]) + bool_idx = np.array([False, False, False, False, False, True]) + + # modify the value for the duplicate index entry + df.loc[trange[bool_idx], "A"] = 6 + + expected = pd.DataFrame({'A': [0, 0, 0, 0, 6, 6], + 'B': [0, 0, 0, 0, 0, 0]}, + index=trange) + + tm.assert_frame_equal(df, expected) + + def test_modify_with_duplicate_index_adding(self): + """ see issue #17105 """ + + trange = pd.date_range(start=pd.Timestamp(year=2017, month=1, day=1), + end=pd.Timestamp(year=2017, month=1, day=5)) + + # insert a duplicate element to the index + trange = trange.insert(loc=5, + item=pd.Timestamp(year=2017, month=1, day=5)) + + df = pd.DataFrame(0, index=trange, columns=["A", "B"]) + bool_idx = np.array([False, False, False, False, False, True]) + + # modify the value for the duplicate index entry + df.loc[trange[bool_idx], "A"] += 7 + + expected = pd.DataFrame({'A': [0, 0, 0, 0, 7, 7], + 'B': [0, 0, 0, 0, 0, 0]}, + index=trange) + + tm.assert_frame_equal(df, expected) From 0a72675d1462328ed26caf3d68555348960c3e03 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 4 May 2018 06:06:19 -0400 Subject: [PATCH 2/3] clean up tests --- doc/source/whatsnew/v0.23.0.txt | 2 +- pandas/tests/indexing/test_loc.py | 28 +++++----------------------- 2 files changed, 6 insertions(+), 24 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index c0c4bcadf4644..aea4d0809d01d 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -1245,7 +1245,7 @@ Indexing - Bug in ``Series.is_unique`` where extraneous output in stderr is shown if Series contains objects with ``__ne__`` defined (:issue:`20661`) - Bug in ``.loc`` assignment with a single-element list-like incorrectly assigns as a list (:issue:`19474`) - Bug in partial string indexing on a ``Series/DataFrame`` with a monotonic decreasing ``DatetimeIndex`` (:issue:`19362`) -- Bug in performing in-place operations on a DataFrame with a duplicate Index (:issue:`17105`) +- Bug in performing in-place operations on a ``DataFrame`` with a duplicate ``Index`` (:issue:`17105`) MultiIndex ^^^^^^^^^^ diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 100233ab33d51..1aee283b7487c 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -786,45 +786,27 @@ def convert_nested_indexer(indexer_type, keys): tm.assert_series_equal(result, expected) def test_modify_with_duplicate_index_assigning(self): - """ see issue #17105 """ + # gh-17105 + # insert a duplicate element to the index trange = pd.date_range(start=pd.Timestamp(year=2017, month=1, day=1), end=pd.Timestamp(year=2017, month=1, day=5)) - # insert a duplicate element to the index trange = trange.insert(loc=5, item=pd.Timestamp(year=2017, month=1, day=5)) df = pd.DataFrame(0, index=trange, columns=["A", "B"]) bool_idx = np.array([False, False, False, False, False, True]) - # modify the value for the duplicate index entry + # assignment df.loc[trange[bool_idx], "A"] = 6 expected = pd.DataFrame({'A': [0, 0, 0, 0, 6, 6], 'B': [0, 0, 0, 0, 0, 0]}, index=trange) - tm.assert_frame_equal(df, expected) - def test_modify_with_duplicate_index_adding(self): - """ see issue #17105 """ - - trange = pd.date_range(start=pd.Timestamp(year=2017, month=1, day=1), - end=pd.Timestamp(year=2017, month=1, day=5)) - - # insert a duplicate element to the index - trange = trange.insert(loc=5, - item=pd.Timestamp(year=2017, month=1, day=5)) - + # in-place df = pd.DataFrame(0, index=trange, columns=["A", "B"]) - bool_idx = np.array([False, False, False, False, False, True]) - - # modify the value for the duplicate index entry - df.loc[trange[bool_idx], "A"] += 7 - - expected = pd.DataFrame({'A': [0, 0, 0, 0, 7, 7], - 'B': [0, 0, 0, 0, 0, 0]}, - index=trange) - + df.loc[trange[bool_idx], "A"] += 6 tm.assert_frame_equal(df, expected) From cf62ea31f6da0c735e1a9cfd7e6058288b6c33f5 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 4 May 2018 06:07:33 -0400 Subject: [PATCH 3/3] move --- pandas/tests/frame/test_indexing.py | 26 ++++++++++++++++++++++++++ pandas/tests/indexing/test_loc.py | 26 -------------------------- 2 files changed, 26 insertions(+), 26 deletions(-) diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index b887b1c9f1218..6d74ce54faa94 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -1929,6 +1929,32 @@ def test_iloc_duplicates(self): expected = df.take([0], axis=1) assert_frame_equal(result, expected) + def test_loc_duplicates(self): + # gh-17105 + + # insert a duplicate element to the index + trange = pd.date_range(start=pd.Timestamp(year=2017, month=1, day=1), + end=pd.Timestamp(year=2017, month=1, day=5)) + + trange = trange.insert(loc=5, + item=pd.Timestamp(year=2017, month=1, day=5)) + + df = pd.DataFrame(0, index=trange, columns=["A", "B"]) + bool_idx = np.array([False, False, False, False, False, True]) + + # assignment + df.loc[trange[bool_idx], "A"] = 6 + + expected = pd.DataFrame({'A': [0, 0, 0, 0, 6, 6], + 'B': [0, 0, 0, 0, 0, 0]}, + index=trange) + tm.assert_frame_equal(df, expected) + + # in-place + df = pd.DataFrame(0, index=trange, columns=["A", "B"]) + df.loc[trange[bool_idx], "A"] += 6 + tm.assert_frame_equal(df, expected) + def test_iloc_sparse_propegate_fill_value(self): from pandas.core.sparse.api import SparseDataFrame df = SparseDataFrame({'A': [999, 1]}, default_fill_value=999) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 1aee283b7487c..6ccff7e898a6a 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -784,29 +784,3 @@ def convert_nested_indexer(indexer_type, keys): index=pd.MultiIndex.from_product(keys)) tm.assert_series_equal(result, expected) - - def test_modify_with_duplicate_index_assigning(self): - # gh-17105 - - # insert a duplicate element to the index - trange = pd.date_range(start=pd.Timestamp(year=2017, month=1, day=1), - end=pd.Timestamp(year=2017, month=1, day=5)) - - trange = trange.insert(loc=5, - item=pd.Timestamp(year=2017, month=1, day=5)) - - df = pd.DataFrame(0, index=trange, columns=["A", "B"]) - bool_idx = np.array([False, False, False, False, False, True]) - - # assignment - df.loc[trange[bool_idx], "A"] = 6 - - expected = pd.DataFrame({'A': [0, 0, 0, 0, 6, 6], - 'B': [0, 0, 0, 0, 0, 0]}, - index=trange) - tm.assert_frame_equal(df, expected) - - # in-place - df = pd.DataFrame(0, index=trange, columns=["A", "B"]) - df.loc[trange[bool_idx], "A"] += 6 - tm.assert_frame_equal(df, expected)