Skip to content

Commit dcf041b

Browse files
authored
BUG: df.getitem returning copy instead of view for unique column in dup index (#45526)
1 parent 10967ce commit dcf041b

File tree

4 files changed

+26
-16
lines changed

4 files changed

+26
-16
lines changed

doc/source/whatsnew/v1.5.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -810,6 +810,7 @@ Indexing
810810
^^^^^^^^
811811
- Bug in :meth:`loc.__getitem__` with a list of keys causing an internal inconsistency that could lead to a disconnect between ``frame.at[x, y]`` vs ``frame[y].loc[x]`` (:issue:`22372`)
812812
- Bug in :meth:`DataFrame.iloc` where indexing a single row on a :class:`DataFrame` with a single ExtensionDtype column gave a copy instead of a view on the underlying data (:issue:`45241`)
813+
- Bug in :meth:`DataFrame.__getitem__` returning copy when :class:`DataFrame` has duplicated columns even if a unique column is selected (:issue:`45316`, :issue:`41062`)
813814
- Bug in :meth:`Series.align` does not create :class:`MultiIndex` with union of levels when both MultiIndexes intersections are identical (:issue:`45224`)
814815
- Bug in setting a NA value (``None`` or ``np.nan``) into a :class:`Series` with int-based :class:`IntervalDtype` incorrectly casting to object dtype instead of a float-based :class:`IntervalDtype` (:issue:`45568`)
815816
- Bug in indexing setting values into an ``ExtensionDtype`` column with ``df.iloc[:, i] = values`` with ``values`` having the same dtype as ``df.iloc[:, i]`` incorrectly inserting a new array instead of setting in-place (:issue:`33457`)

pandas/core/frame.py

+10-3
Original file line numberDiff line numberDiff line change
@@ -3612,11 +3612,18 @@ def __getitem__(self, key):
36123612
if is_hashable(key) and not is_iterator(key):
36133613
# is_iterator to exclude generator e.g. test_getitem_listlike
36143614
# shortcut if the key is in columns
3615-
if self.columns.is_unique and key in self.columns:
3616-
if isinstance(self.columns, MultiIndex):
3617-
return self._getitem_multilevel(key)
3615+
is_mi = isinstance(self.columns, MultiIndex)
3616+
# GH#45316 Return view if key is not duplicated
3617+
# Only use drop_duplicates with duplicates for performance
3618+
if not is_mi and (
3619+
self.columns.is_unique
3620+
and key in self.columns
3621+
or key in self.columns.drop_duplicates(keep=False)
3622+
):
36183623
return self._get_item_cache(key)
36193624

3625+
elif is_mi and self.columns.is_unique and key in self.columns:
3626+
return self._getitem_multilevel(key)
36203627
# Do we have a slicer (on rows)?
36213628
indexer = convert_to_index_sliceable(self, key)
36223629
if indexer is not None:

pandas/tests/frame/indexing/test_getitem.py

+15
Original file line numberDiff line numberDiff line change
@@ -357,6 +357,21 @@ def test_getitem_empty_frame_with_boolean(self):
357357
df2 = df[df > 0]
358358
tm.assert_frame_equal(df, df2)
359359

360+
def test_getitem_returns_view_when_column_is_unique_in_df(self):
361+
# GH#45316
362+
df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["a", "a", "b"])
363+
view = df["b"]
364+
view.loc[:] = 100
365+
expected = DataFrame([[1, 2, 100], [4, 5, 100]], columns=["a", "a", "b"])
366+
tm.assert_frame_equal(df, expected)
367+
368+
def test_getitem_frozenset_unique_in_column(self):
369+
# GH#41062
370+
df = DataFrame([[1, 2, 3, 4]], columns=[frozenset(["KEY"]), "B", "C", "C"])
371+
result = df[frozenset(["KEY"])]
372+
expected = Series([1], name=frozenset(["KEY"]))
373+
tm.assert_series_equal(result, expected)
374+
360375

361376
class TestGetitemSlice:
362377
def test_getitem_slice_float64(self, frame_or_series):

pandas/tests/indexing/test_chaining_and_caching.py

-13
Original file line numberDiff line numberDiff line change
@@ -424,19 +424,6 @@ def test_detect_chained_assignment_warnings_errors(self):
424424
with pytest.raises(SettingWithCopyError, match=msg):
425425
df.loc[0]["A"] = 111
426426

427-
def test_detect_chained_assignment_warnings_filter_and_dupe_cols(self):
428-
# xref gh-13017.
429-
with option_context("chained_assignment", "warn"):
430-
df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, -9]], columns=["a", "a", "c"])
431-
432-
with tm.assert_produces_warning(SettingWithCopyWarning):
433-
df.c.loc[df.c > 0] = None
434-
435-
expected = DataFrame(
436-
[[1, 2, 3], [4, 5, 6], [7, 8, -9]], columns=["a", "a", "c"]
437-
)
438-
tm.assert_frame_equal(df, expected)
439-
440427
@pytest.mark.parametrize("rhs", [3, DataFrame({0: [1, 2, 3, 4]})])
441428
def test_detect_chained_assignment_warning_stacklevel(self, rhs):
442429
# GH#42570

0 commit comments

Comments
 (0)