From 64101f85a354345b9de94ac5c976d467ebcedd0b Mon Sep 17 00:00:00 2001 From: phofl Date: Tue, 19 Jan 2021 21:36:10 +0100 Subject: [PATCH 1/4] BUG: loc.setitem raising ValueError when df has duplicate columns --- doc/source/whatsnew/v1.3.0.rst | 1 + pandas/core/indexing.py | 3 ++- pandas/tests/frame/indexing/test_setitem.py | 9 +++++++++ 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index ab00b749d5725..c419884ee6ce1 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -276,6 +276,7 @@ Indexing - Bug in :meth:`DataFrame.loc`, :meth:`Series.loc`, :meth:`DataFrame.__getitem__` and :meth:`Series.__getitem__` returning incorrect elements for non-monotonic :class:`DatetimeIndex` for string slices (:issue:`33146`) - Bug in :meth:`DataFrame.reindex` and :meth:`Series.reindex` with timezone aware indexes raising ``TypeError`` for ``method="ffill"`` and ``method="bfill"`` and specified ``tolerance`` (:issue:`38566`) - Bug in :meth:`DataFrame.__setitem__` raising ``ValueError`` with empty :class:`DataFrame` and specified columns for string indexer and non empty :class:`DataFrame` to set (:issue:`38831`) +- Bug in :meth:`DataFrame.loc.__setitem__` raising ValueError when expanding unique column for :class:`DataFrame` with duplicate columns (:issue:`38521`) - Bug in :meth:`DataFrame.iloc.__setitem__` and :meth:`DataFrame.loc.__setitem__` with mixed dtypes when setting with a dictionary value (:issue:`38335`) - Bug in :meth:`DataFrame.loc` dropping levels of :class:`MultiIndex` when :class:`DataFrame` used as input has only one row (:issue:`10521`) - Bug in setting ``timedelta64`` values into numeric :class:`Series` failing to cast to object dtype (:issue:`39086`) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 90ba03b312e56..ac44d8cc7d823 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -29,6 +29,7 @@ from pandas.core.dtypes.generic import ABCDataFrame, ABCMultiIndex, ABCSeries from pandas.core.dtypes.missing import infer_fill_value, isna +import pandas.core.algorithms as algos import pandas.core.common as com from pandas.core.construction import array as pd_array from pandas.core.indexers import ( @@ -1850,7 +1851,7 @@ def _setitem_single_block(self, indexer, value, name: str): for i, idx in enumerate(indexer) if i != info_axis ) - and item_labels.is_unique + and algos.unique(item_labels[indexer[info_axis]]) ): self.obj[item_labels[indexer[info_axis]]] = value return diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 295c8a27e6ddd..e249da57e33be 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -373,6 +373,15 @@ def test_setitem_string_column_numpy_dtype_raising(self): expected = DataFrame([[1, 2, 5], [3, 4, 6]], columns=[0, 1, "0 - Name"]) tm.assert_frame_equal(df, expected) + def test_setitem_empty_df_duplicate_columns(self): + # GH#38521 + df = DataFrame(columns=["a", "b", "b"], dtype="float64") + df.loc[:, "a"] = list(range(2)) + expected = DataFrame( + [[0, np.nan, np.nan], [1, np.nan, np.nan]], columns=["a", "b", "b"] + ) + tm.assert_frame_equal(df, expected) + class TestDataFrameSetItemSlicing: def test_setitem_slice_position(self): From 6c9f2451e34212f2d7fdaed0107941f32c936f3e Mon Sep 17 00:00:00 2001 From: phofl Date: Tue, 19 Jan 2021 22:10:00 +0100 Subject: [PATCH 2/4] Use is_scalar instead of unique --- pandas/core/indexing.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index ac44d8cc7d823..eeaa1a621bbca 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -29,7 +29,6 @@ from pandas.core.dtypes.generic import ABCDataFrame, ABCMultiIndex, ABCSeries from pandas.core.dtypes.missing import infer_fill_value, isna -import pandas.core.algorithms as algos import pandas.core.common as com from pandas.core.construction import array as pd_array from pandas.core.indexers import ( @@ -1851,7 +1850,7 @@ def _setitem_single_block(self, indexer, value, name: str): for i, idx in enumerate(indexer) if i != info_axis ) - and algos.unique(item_labels[indexer[info_axis]]) + and is_scalar(item_labels[indexer[info_axis]]) ): self.obj[item_labels[indexer[info_axis]]] = value return From 9bc5fc9a10beef94cd85f095f252cacc613a68a2 Mon Sep 17 00:00:00 2001 From: phofl Date: Tue, 19 Jan 2021 23:02:40 +0100 Subject: [PATCH 3/4] Change condition --- pandas/core/indexing.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index eeaa1a621bbca..65100232d94f1 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1850,7 +1850,8 @@ def _setitem_single_block(self, indexer, value, name: str): for i, idx in enumerate(indexer) if i != info_axis ) - and is_scalar(item_labels[indexer[info_axis]]) + and len(item_labels.get_indexer_for([item_labels[indexer[info_axis]]])) + == 1 ): self.obj[item_labels[indexer[info_axis]]] = value return From 74c1dd08fbc4ee208241ddec12f1d866dad0636d Mon Sep 17 00:00:00 2001 From: phofl Date: Wed, 20 Jan 2021 01:06:33 +0100 Subject: [PATCH 4/4] Make nested condition --- pandas/core/indexing.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 65100232d94f1..1402354c9b2ca 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1850,11 +1850,11 @@ def _setitem_single_block(self, indexer, value, name: str): for i, idx in enumerate(indexer) if i != info_axis ) - and len(item_labels.get_indexer_for([item_labels[indexer[info_axis]]])) - == 1 ): - self.obj[item_labels[indexer[info_axis]]] = value - return + selected_item_labels = item_labels[indexer[info_axis]] + if len(item_labels.get_indexer_for([selected_item_labels])) == 1: + self.obj[selected_item_labels] = value + return indexer = maybe_convert_ix(*indexer) if (isinstance(value, ABCSeries) and name != "iloc") or isinstance(value, dict):