-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
BUG: assignment to multiple columns when some column do not exist #26534
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -3007,6 +3007,12 @@ def _setitem_array(self, key, value): | |||||||||||||
for k1, k2 in zip(key, value.columns): | ||||||||||||||
self[k1] = value[k2] | ||||||||||||||
else: | ||||||||||||||
if all(is_hashable(k) for k in key): | ||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Under what cases can elements of I'm a bit concerned about the performance here, for the case when There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The only reason that I check whether pandas/pandas/tests/indexing/test_indexing.py Lines 180 to 184 in e55b698
If producing all these errors are not required then this check can be omitted. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @TomAugspurger what's your thought on this? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What happens if the check is removed? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The error produced is different. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What's the error? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yea this seems strange to me. I feel like this should be hashable inherently so clarification here would be helpful There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Without testing pandas/pandas/tests/indexing/test_indexing.py Line 184 in 3c0cf22
Buffer has wrong number of dimensions (expected 1, got 2) or Array conditional must be same shape as self instead of the expected errors.
|
||||||||||||||
for k in key: | ||||||||||||||
try: | ||||||||||||||
self[k] | ||||||||||||||
except KeyError: | ||||||||||||||
self[k] = np.nan | ||||||||||||||
indexer = self.loc._get_listlike_indexer( | ||||||||||||||
key, axis=1, raise_missing=False | ||||||||||||||
)[1] | ||||||||||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,6 +12,7 @@ | |
from pandas.core.dtypes.common import ( | ||
ensure_platform_int, | ||
is_float, | ||
is_hashable, | ||
is_integer, | ||
is_integer_dtype, | ||
is_iterator, | ||
|
@@ -197,6 +198,19 @@ def _get_setitem_indexer(self, key): | |
def __setitem__(self, key, value): | ||
if isinstance(key, tuple): | ||
key = tuple(com.apply_if_callable(x, self.obj) for x in key) | ||
if ( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe this gets simplified if the |
||
self.name == "loc" | ||
and len(key) > 1 | ||
and is_list_like_indexer(key[1]) | ||
and not isinstance(key[1], tuple) | ||
and not com.is_bool_indexer(key[1]) | ||
and all(is_hashable(k) for k in key[1]) | ||
): | ||
for k in key[1]: | ||
try: | ||
self.obj[k] | ||
except KeyError: | ||
self.obj[k] = np.nan | ||
else: | ||
key = com.apply_if_callable(key, self.obj) | ||
indexer = self._get_setitem_indexer(key) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -208,6 +208,29 @@ def test_setitem_list_of_tuples(self, float_frame): | |
expected = Series(tuples, index=float_frame.index, name="tuples") | ||
assert_series_equal(result, expected) | ||
|
||
@pytest.mark.parametrize("columns", [["A", "E"], ["E", "F"]]) | ||
@pytest.mark.parametrize( | ||
"box", | ||
[ | ||
lambda x: 1, | ||
lambda x: [1, 2], | ||
lambda x: np.array([1, 2]), | ||
lambda x: x[["B", "C"]], | ||
lambda x: x[["B", "A"]].values, | ||
lambda x: x[["A", "C"]].values.tolist(), | ||
], | ||
) | ||
def test_setitem_list_missing_columns(self, float_frame, columns, box): | ||
# GH 26534 | ||
result = float_frame.copy() | ||
result[columns] = box(float_frame) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm not really sure what the point of this parametrization is - can you construct test(s) that don't overwrite the existing column data before the assignment? Seems unrelated unless I am missing something There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I parametrize the tests as #26534 (comment) suggest. What do you propose? |
||
expected = float_frame.copy() | ||
for col in columns: | ||
if col not in expected.columns: | ||
expected[col] = np.nan | ||
expected[columns] = box(float_frame) | ||
assert_frame_equal(result, expected) | ||
|
||
def test_setitem_mulit_index(self): | ||
# GH7655, test that assigning to a sub-frame of a frame | ||
# with multi-index columns aligns both rows and columns | ||
|
@@ -501,13 +524,6 @@ def test_setitem(self, float_frame): | |
float_frame["col6"] = series | ||
tm.assert_series_equal(series, float_frame["col6"], check_names=False) | ||
|
||
msg = ( | ||
r"\"None of \[Float64Index\(\[.*dtype='float64'\)\] are in the" | ||
r" \[columns\]\"" | ||
) | ||
with pytest.raises(KeyError, match=msg): | ||
float_frame[np.random.randn(len(float_frame) + 1)] = 1 | ||
|
||
# set ndarray | ||
arr = np.random.randn(len(float_frame)) | ||
float_frame["col9"] = arr | ||
|
@@ -1143,17 +1159,6 @@ def test_fancy_index_int_labels_exceptions(self, float_frame): | |
) | ||
with pytest.raises(KeyError, match=msg): | ||
float_frame.ix[["foo", "bar", "baz"]] = 1 | ||
msg = ( | ||
r"None of \[Index\(\['E'\], dtype='object'\)\] are in the" | ||
r" \[columns\]" | ||
) | ||
with pytest.raises(KeyError, match=msg): | ||
float_frame.ix[:, ["E"]] = 1 | ||
|
||
# FIXME: don't leave commented-out | ||
# partial setting now allows this GH2578 | ||
# pytest.raises(KeyError, float_frame.ix.__setitem__, | ||
# (slice(None, None), 'E'), 1) | ||
|
||
def test_setitem_fancy_mixed_2d(self, float_string_frame): | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.