Skip to content

BUG: df.getitem returning copy instead of view for unique column in dup index #45526

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 16 commits into from
Jun 22, 2022
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.5.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -809,6 +809,7 @@ Indexing
^^^^^^^^
- Bug in :meth:`loc.__getitem__` with a list of keys causing an internal inconsistency that could lead to a disconnect between ``frame.at[x, y]`` vs ``frame[y].loc[x]`` (:issue:`22372`)
- Bug in :meth:`DataFrame.iloc` where indexing a single row on a :class:`DataFrame` with a single ExtensionDtype column gave a copy instead of a view on the underlying data (:issue:`45241`)
- Bug in :meth:`DataFrame.__getitem__` returning copy when :class:`DataFrame` has duplicated columns even if a unique column is selected (:issue:`45316`, :issue:`41062`)
- Bug in :meth:`Series.align` does not create :class:`MultiIndex` with union of levels when both MultiIndexes intersections are identical (:issue:`45224`)
- Bug in setting a NA value (``None`` or ``np.nan``) into a :class:`Series` with int-based :class:`IntervalDtype` incorrectly casting to object dtype instead of a float-based :class:`IntervalDtype` (:issue:`45568`)
- Bug in indexing setting values into an ``ExtensionDtype`` column with ``df.iloc[:, i] = values`` with ``values`` having the same dtype as ``df.iloc[:, i]`` incorrectly inserting a new array instead of setting in-place (:issue:`33457`)
Expand Down
13 changes: 10 additions & 3 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -3612,11 +3612,18 @@ def __getitem__(self, key):
if is_hashable(key) and not is_iterator(key):
# is_iterator to exclude generator e.g. test_getitem_listlike
# shortcut if the key is in columns
if self.columns.is_unique and key in self.columns:
if isinstance(self.columns, MultiIndex):
return self._getitem_multilevel(key)
is_mi = isinstance(self.columns, MultiIndex)
# GH#45316 Return view if key is not duplicated
# Only use drop_duplicates with duplicates for performance
if not is_mi and (
self.columns.is_unique
and key in self.columns
or key in self.columns.drop_duplicates(keep=False)
):
return self._get_item_cache(key)

elif is_mi and self.columns.is_unique and key in self.columns:
return self._getitem_multilevel(key)
# Do we have a slicer (on rows)?
indexer = convert_to_index_sliceable(self, key)
if indexer is not None:
Expand Down
15 changes: 15 additions & 0 deletions pandas/tests/frame/indexing/test_getitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -357,6 +357,21 @@ def test_getitem_empty_frame_with_boolean(self):
df2 = df[df > 0]
tm.assert_frame_equal(df, df2)

def test_getitem_returns_view_when_column_is_unique_in_df(self):
# GH#45316
df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["a", "a", "b"])
view = df["b"]
view.loc[:] = 100
expected = DataFrame([[1, 2, 100], [4, 5, 100]], columns=["a", "a", "b"])
tm.assert_frame_equal(df, expected)

def test_getitem_frozenset_unique_in_column(self):
# GH#41062
df = DataFrame([[1, 2, 3, 4]], columns=[frozenset(["KEY"]), "B", "C", "C"])
result = df[frozenset(["KEY"])]
expected = Series([1], name=frozenset(["KEY"]))
tm.assert_series_equal(result, expected)


class TestGetitemSlice:
def test_getitem_slice_float64(self, frame_or_series):
Expand Down
13 changes: 0 additions & 13 deletions pandas/tests/indexing/test_chaining_and_caching.py
Original file line number Diff line number Diff line change
Expand Up @@ -424,19 +424,6 @@ def test_detect_chained_assignment_warnings_errors(self):
with pytest.raises(SettingWithCopyError, match=msg):
df.loc[0]["A"] = 111

def test_detect_chained_assignment_warnings_filter_and_dupe_cols(self):
# xref gh-13017.
with option_context("chained_assignment", "warn"):
df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, -9]], columns=["a", "a", "c"])

with tm.assert_produces_warning(SettingWithCopyWarning):
df.c.loc[df.c > 0] = None

expected = DataFrame(
[[1, 2, 3], [4, 5, 6], [7, 8, -9]], columns=["a", "a", "c"]
)
tm.assert_frame_equal(df, expected)

@pytest.mark.parametrize("rhs", [3, DataFrame({0: [1, 2, 3, 4]})])
def test_detect_chained_assignment_warning_stacklevel(self, rhs):
# GH#42570
Expand Down