-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
REF/API: DataFrame.__setitem__ never operate in-place #39510
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 19 commits
a5c1f5e
15f5265
73302c0
ceeeb78
6b304b8
fe9be7e
1789fca
6400be3
a1cdd19
870ae37
c32a427
d9d8beb
8a283d9
9718f6e
9f6d8e7
dd53abc
f6b09b4
c107e77
ae3ae1b
c631079
4cfe863
03e6eb9
d280f60
9db5537
4054aea
bd76479
7ea792f
20f6a16
fdbf6f3
2c6da5d
2d0cf9e
e1ed083
6896e86
1af8686
c8b8e23
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3257,6 +3257,7 @@ def _setitem_slice(self, key: slice, value): | |
def _setitem_array(self, key, value): | ||
# also raises Exception if object array with NA values | ||
if com.is_bool_indexer(key): | ||
# bool indexer is indexing along rows | ||
if len(key) != len(self.index): | ||
raise ValueError( | ||
f"Item wrong length {len(key)} instead of {len(self.index)}!" | ||
|
@@ -3265,18 +3266,66 @@ def _setitem_array(self, key, value): | |
indexer = key.nonzero()[0] | ||
self._check_setitem_copy() | ||
self.iloc[indexer] = value | ||
|
||
else: | ||
if isinstance(value, DataFrame): | ||
check_key_length(self.columns, key, value) | ||
for k1, k2 in zip(key, value.columns): | ||
self[k1] = value[k2] | ||
|
||
elif not is_list_like(value): | ||
for col in key: | ||
self[col] = value | ||
|
||
elif isinstance(value, np.ndarray) and value.ndim == 2: | ||
self._iset_not_inplace(key, value) | ||
|
||
elif np.ndim(value) > 1: | ||
# list of lists | ||
value = DataFrame(value).values | ||
return self._setitem_array(key, value) | ||
|
||
else: | ||
self.loc._ensure_listlike_indexer(key, axis=1, value=value) | ||
indexer = self.loc._get_listlike_indexer( | ||
key, axis=1, raise_missing=False | ||
)[1] | ||
self._check_setitem_copy() | ||
self.iloc[:, indexer] = value | ||
self._iset_not_inplace(key, value) | ||
|
||
def _iset_not_inplace(self, key, value): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you add a comment (or informal docstring) here stating briefly which cases this covers? (eg I think only for listlike key and value?) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. good idea, will do There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. updated, LMK if you find this informative I think comments are addressed, so this is ready for another look (once the CI runs) |
||
def igetitem(obj, i: int): | ||
# Note: we catch DataFrame obj before getting here, but | ||
# hypothetically would return obj.iloc[:, i] | ||
if isinstance(obj, np.ndarray): | ||
return obj[..., i] | ||
else: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. in the future would be really nice to move lots of this elsewhere (e.g. indexing.py or similar) |
||
return obj[i] | ||
|
||
if self.columns.is_unique: | ||
if np.shape(value)[-1] != len(key): | ||
raise ValueError("Columns must be same length as key") | ||
|
||
for i, col in enumerate(key): | ||
self[col] = igetitem(value, i) | ||
|
||
else: | ||
|
||
ilocs = self.columns.get_indexer_non_unique(key)[0] | ||
if (ilocs < 0).any(): | ||
# key entries not in self.columns | ||
raise NotImplementedError | ||
|
||
if np.shape(value)[-1] != len(ilocs): | ||
raise ValueError("Columns must be same length as key") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. are all of the branches covered? this is a lot of new code There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. not the NotImplementedError on L3276; not sure what to do there There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. but the rest is covered? what about the previous paths this was taking, can they be removed now? (e.g. they are uncovered) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. updated with a test that hits the two |
||
|
||
assert np.ndim(value) <= 2 | ||
|
||
orig_columns = self.columns | ||
|
||
# Using self.iloc[:, i] = ... may set values inplace, which | ||
# by convention we do not do in __setitem__ | ||
try: | ||
self.columns = Index(range(len(self.columns))) | ||
for i, iloc in enumerate(ilocs): | ||
self[iloc] = igetitem(value, i) | ||
finally: | ||
self.columns = orig_columns | ||
|
||
def _setitem_frame(self, key, value): | ||
# support boolean setting with DataFrame input, e.g. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -334,6 +334,10 @@ def test_setitem_complete_column_with_array(self): | |
"d": [1, 1, 1], | ||
} | ||
) | ||
expected["c"] = expected["c"].astype(arr.dtype) | ||
expected["d"] = expected["d"].astype(arr.dtype) | ||
assert expected["c"].dtype == arr.dtype | ||
assert expected["d"].dtype == arr.dtype | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What changed here? (I wouldn't expect the above test to change behaviour related to this PR, as it's adding new columns) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. IIRC this was needed for the 32bit builds |
||
tm.assert_frame_equal(df, expected) | ||
|
||
@pytest.mark.parametrize("dtype", ["f8", "i8", "u8"]) | ||
|
@@ -373,11 +377,24 @@ def test_setitem_frame_duplicate_columns(self): | |
[np.nan, 1, 2, np.nan, 4, 5], | ||
[np.nan, 1, 2, np.nan, 4, 5], | ||
], | ||
columns=cols, | ||
dtype="object", | ||
) | ||
expected[2] = expected[2].astype(np.int64) | ||
expected[5] = expected[5].astype(np.int64) | ||
expected.columns = cols | ||
tm.assert_frame_equal(df, expected) | ||
|
||
def test_setitem_frame_duplicate_columns_size_mismatch(self): | ||
# GH#39510 | ||
cols = ["A", "B", "C"] * 2 | ||
df = DataFrame(index=range(3), columns=cols) | ||
with pytest.raises(ValueError, match="Columns must be same length as key"): | ||
df[["A"]] = (0, 3, 5) | ||
|
||
df2 = df.iloc[:, :3] # unique columns | ||
with pytest.raises(ValueError, match="Columns must be same length as key"): | ||
df2[["A"]] = (0, 3, 5) | ||
|
||
@pytest.mark.parametrize("cols", [["a", "b", "c"], ["a", "a", "a"]]) | ||
def test_setitem_df_wrong_column_number(self, cols): | ||
# GH#38604 | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
really like to move this stuff out of here into another module, but i guess that's for another day