diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 2346a86ad21f8..4a86194356a71 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -810,6 +810,7 @@ Indexing ^^^^^^^^ - Bug in :meth:`loc.__getitem__` with a list of keys causing an internal inconsistency that could lead to a disconnect between ``frame.at[x, y]`` vs ``frame[y].loc[x]`` (:issue:`22372`) - Bug in :meth:`DataFrame.iloc` where indexing a single row on a :class:`DataFrame` with a single ExtensionDtype column gave a copy instead of a view on the underlying data (:issue:`45241`) +- Bug in :meth:`DataFrame.__getitem__` returning copy when :class:`DataFrame` has duplicated columns even if a unique column is selected (:issue:`45316`, :issue:`41062`) - Bug in :meth:`Series.align` does not create :class:`MultiIndex` with union of levels when both MultiIndexes intersections are identical (:issue:`45224`) - Bug in setting a NA value (``None`` or ``np.nan``) into a :class:`Series` with int-based :class:`IntervalDtype` incorrectly casting to object dtype instead of a float-based :class:`IntervalDtype` (:issue:`45568`) - Bug in indexing setting values into an ``ExtensionDtype`` column with ``df.iloc[:, i] = values`` with ``values`` having the same dtype as ``df.iloc[:, i]`` incorrectly inserting a new array instead of setting in-place (:issue:`33457`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 8711d53353185..920358f206770 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3612,11 +3612,18 @@ def __getitem__(self, key): if is_hashable(key) and not is_iterator(key): # is_iterator to exclude generator e.g. test_getitem_listlike # shortcut if the key is in columns - if self.columns.is_unique and key in self.columns: - if isinstance(self.columns, MultiIndex): - return self._getitem_multilevel(key) + is_mi = isinstance(self.columns, MultiIndex) + # GH#45316 Return view if key is not duplicated + # Only use drop_duplicates with duplicates for performance + if not is_mi and ( + self.columns.is_unique + and key in self.columns + or key in self.columns.drop_duplicates(keep=False) + ): return self._get_item_cache(key) + elif is_mi and self.columns.is_unique and key in self.columns: + return self._getitem_multilevel(key) # Do we have a slicer (on rows)? indexer = convert_to_index_sliceable(self, key) if indexer is not None: diff --git a/pandas/tests/frame/indexing/test_getitem.py b/pandas/tests/frame/indexing/test_getitem.py index 8cc8b487ff44f..7994c56f8d68b 100644 --- a/pandas/tests/frame/indexing/test_getitem.py +++ b/pandas/tests/frame/indexing/test_getitem.py @@ -357,6 +357,21 @@ def test_getitem_empty_frame_with_boolean(self): df2 = df[df > 0] tm.assert_frame_equal(df, df2) + def test_getitem_returns_view_when_column_is_unique_in_df(self): + # GH#45316 + df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["a", "a", "b"]) + view = df["b"] + view.loc[:] = 100 + expected = DataFrame([[1, 2, 100], [4, 5, 100]], columns=["a", "a", "b"]) + tm.assert_frame_equal(df, expected) + + def test_getitem_frozenset_unique_in_column(self): + # GH#41062 + df = DataFrame([[1, 2, 3, 4]], columns=[frozenset(["KEY"]), "B", "C", "C"]) + result = df[frozenset(["KEY"])] + expected = Series([1], name=frozenset(["KEY"])) + tm.assert_series_equal(result, expected) + class TestGetitemSlice: def test_getitem_slice_float64(self, frame_or_series): diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py index 47f929d87bd6f..adc001695579c 100644 --- a/pandas/tests/indexing/test_chaining_and_caching.py +++ b/pandas/tests/indexing/test_chaining_and_caching.py @@ -424,19 +424,6 @@ def test_detect_chained_assignment_warnings_errors(self): with pytest.raises(SettingWithCopyError, match=msg): df.loc[0]["A"] = 111 - def test_detect_chained_assignment_warnings_filter_and_dupe_cols(self): - # xref gh-13017. - with option_context("chained_assignment", "warn"): - df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, -9]], columns=["a", "a", "c"]) - - with tm.assert_produces_warning(SettingWithCopyWarning): - df.c.loc[df.c > 0] = None - - expected = DataFrame( - [[1, 2, 3], [4, 5, 6], [7, 8, -9]], columns=["a", "a", "c"] - ) - tm.assert_frame_equal(df, expected) - @pytest.mark.parametrize("rhs", [3, DataFrame({0: [1, 2, 3, 4]})]) def test_detect_chained_assignment_warning_stacklevel(self, rhs): # GH#42570