Skip to content

Commit 6cc0885

Browse files
author
Si Wei How
committed
Fix assignment to multiple columns when some column do not exist
1 parent 0a516c1 commit 6cc0885

File tree

4 files changed

+77
-18
lines changed

4 files changed

+77
-18
lines changed

doc/source/whatsnew/v0.25.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -425,6 +425,7 @@ Indexing
425425
- Bug in which :meth:`DataFrame.to_csv` caused a segfault for a reindexed data frame, when the indices were single-level :class:`MultiIndex` (:issue:`26303`).
426426
- Fixed bug where assigning a :class:`arrays.PandasArray` to a :class:`pandas.core.frame.DataFrame` would raise error (:issue:`26390`)
427427
- Allow keyword arguments for callable local reference used in the :method:`DataFrame.query` string (:issue:`26426`)
428+
- Bug in assignment to multiple columns of a `DataFrame` when some of the columns do not exist (:issue:`13658`)
428429

429430

430431
Missing

pandas/core/frame.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -3371,11 +3371,13 @@ def _setitem_array(self, key, value):
33713371
if len(value.columns) != len(key):
33723372
raise ValueError('Columns must be same length as key')
33733373
for k1, k2 in zip(key, value.columns):
3374-
self[k1] = value[k2]
3374+
self._set_item(k1, value[k2])
33753375
else:
3376-
indexer = self.loc._convert_to_indexer(key, axis=1)
3376+
indexer = self.loc._convert_to_indexer(key, axis=1,
3377+
is_setter=True)
33773378
self._check_setitem_copy()
3378-
self.loc._setitem_with_indexer((slice(None), indexer), value)
3379+
self.loc._setitem_with_indexer((slice(None), indexer), value,
3380+
columns=key)
33793381

33803382
def _setitem_frame(self, key, value):
33813383
# support boolean setting with DataFrame input, e.g.

pandas/core/indexing.py

+26-11
Original file line numberDiff line numberDiff line change
@@ -294,7 +294,7 @@ def _has_valid_positional_setitem_indexer(self, indexer):
294294

295295
return True
296296

297-
def _setitem_with_indexer(self, indexer, value):
297+
def _setitem_with_indexer(self, indexer, value, columns=None):
298298
self._has_valid_setitem_indexer(indexer)
299299

300300
# also has the side effect of consolidating in-place
@@ -304,6 +304,11 @@ def _setitem_with_indexer(self, indexer, value):
304304
# maybe partial set
305305
take_split_path = self.obj._is_mixed_type
306306

307+
# if not all columns are present
308+
if (columns is not None and
309+
any(k not in self.obj.columns for k in columns)):
310+
take_split_path = True
311+
307312
# if there is only one block/type, still have to take split path
308313
# unless the block is one-dimensional or it can hold the value
309314
if not take_split_path and self.obj._data.blocks:
@@ -466,10 +471,13 @@ def _setitem_with_indexer(self, indexer, value):
466471
if isinstance(value, ABCSeries):
467472
value = self._align_series(indexer, value)
468473

469-
info_idx = indexer[info_axis]
470-
if is_integer(info_idx):
471-
info_idx = [info_idx]
472-
labels = item_labels[info_idx]
474+
if columns is None:
475+
info_idx = indexer[info_axis]
476+
if is_integer(info_idx):
477+
info_idx = [info_idx]
478+
labels = item_labels[info_idx]
479+
else:
480+
labels = columns
473481

474482
# if we have a partial multiindex, then need to adjust the plane
475483
# indexer here
@@ -517,6 +525,9 @@ def _setitem_with_indexer(self, indexer, value):
517525
lplane_indexer = 0
518526

519527
def setter(item, v):
528+
if item not in self.obj.columns:
529+
self.obj[item] = v
530+
return
520531
s = self.obj[item]
521532
pi = plane_indexer[0] if lplane_indexer == 1 else plane_indexer
522533

@@ -1109,7 +1120,8 @@ def _getitem_axis(self, key, axis=None):
11091120

11101121
return self._get_label(key, axis=axis)
11111122

1112-
def _get_listlike_indexer(self, key, axis, raise_missing=False):
1123+
def _get_listlike_indexer(self, key, axis, is_setter=False,
1124+
raise_missing=False):
11131125
"""
11141126
Transform a list-like of keys into a new index and an indexer.
11151127
@@ -1162,6 +1174,7 @@ def _get_listlike_indexer(self, key, axis, raise_missing=False):
11621174

11631175
self._validate_read_indexer(keyarr, indexer,
11641176
o._get_axis_number(axis),
1177+
is_setter=is_setter,
11651178
raise_missing=raise_missing)
11661179
return keyarr, indexer
11671180

@@ -1210,7 +1223,8 @@ def _getitem_iterable(self, key, axis=None):
12101223
return self.obj._reindex_with_indexers({axis: [keyarr, indexer]},
12111224
copy=True, allow_dups=True)
12121225

1213-
def _validate_read_indexer(self, key, indexer, axis, raise_missing=False):
1226+
def _validate_read_indexer(self, key, indexer, axis, is_setter=False,
1227+
raise_missing=False):
12141228
"""
12151229
Check that indexer can be used to return a result (e.g. at least one
12161230
element was found, unless the list of keys was actually empty).
@@ -1244,7 +1258,8 @@ def _validate_read_indexer(self, key, indexer, axis, raise_missing=False):
12441258
missing = (indexer < 0).sum()
12451259

12461260
if missing:
1247-
if missing == len(indexer):
1261+
if (not(self.name == 'loc' and is_setter) and
1262+
missing == len(indexer)):
12481263
raise KeyError(
12491264
"None of [{key}] are in the [{axis}]".format(
12501265
key=key, axis=self.obj._get_axis_name(axis)))
@@ -1268,7 +1283,7 @@ def _validate_read_indexer(self, key, indexer, axis, raise_missing=False):
12681283
See the documentation here:
12691284
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike""") # noqa
12701285

1271-
if not (ax.is_categorical() or ax.is_interval()):
1286+
if not is_setter and not (ax.is_categorical() or ax.is_interval()):
12721287
warnings.warn(_missing_key_warning,
12731288
FutureWarning, stacklevel=6)
12741289

@@ -1353,8 +1368,8 @@ def _convert_to_indexer(self, obj, axis=None, is_setter=False,
13531368
return inds
13541369
else:
13551370
# When setting, missing keys are not allowed, even with .loc:
1356-
kwargs = {'raise_missing': True if is_setter else
1357-
raise_missing}
1371+
kwargs = {'is_setter': is_setter,
1372+
'raise_missing': raise_missing}
13581373
return self._get_listlike_indexer(obj, axis, **kwargs)[1]
13591374
else:
13601375
try:

pandas/tests/frame/test_indexing.py

+45-4
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@
1818
from pandas.tests.frame.common import TestData
1919
import pandas.util.testing as tm
2020
from pandas.util.testing import (
21-
assert_almost_equal, assert_frame_equal, assert_series_equal)
21+
assert_almost_equal, assert_frame_equal, assert_index_equal,
22+
assert_numpy_array_equal, assert_series_equal)
2223

2324
from pandas.tseries.offsets import BDay
2425

@@ -185,6 +186,49 @@ def test_setitem_list_of_tuples(self):
185186
expected = Series(tuples, index=self.frame.index, name='tuples')
186187
assert_series_equal(result, expected)
187188

189+
def test_setitem_list_missing_columns(self):
190+
df = self.frame.copy()
191+
df[['E', 'F']] = 1
192+
193+
result = df.columns
194+
expected = self.frame.columns.union(['E', 'F'])
195+
assert_index_equal(result, expected)
196+
197+
result = df[self.frame.columns]
198+
expected = self.frame
199+
assert_frame_equal(result, expected)
200+
201+
assert (df[['E', 'F']] == 1).all().all()
202+
203+
df = self.frame.copy()
204+
df[['A', 'E']] = [1, 2]
205+
206+
result = df.columns
207+
expected = self.frame.columns.union(['E'])
208+
assert_index_equal(result, expected)
209+
210+
result = df[self.frame.columns.drop('A')]
211+
expected = self.frame.drop('A', axis=1)
212+
assert_frame_equal(result, expected)
213+
214+
assert (df['A'] == 1).all()
215+
assert (df['E'] == 2).all()
216+
217+
df = self.frame.copy()
218+
df[['A', 'E']] = self.frame[['B', 'C']]
219+
220+
result = df.columns
221+
expected = self.frame.columns.union(['E'])
222+
assert_index_equal(result, expected)
223+
224+
result = df[self.frame.columns.drop('A')]
225+
expected = self.frame.drop('A', axis=1)
226+
assert_frame_equal(result, expected)
227+
228+
result = df[['A', 'E']]
229+
expected = self.frame[['B', 'C']]
230+
assert_numpy_array_equal(result.to_numpy(), expected.to_numpy())
231+
188232
def test_setitem_mulit_index(self):
189233
# GH7655, test that assigning to a sub-frame of a frame
190234
# with multi-index columns aligns both rows and columns
@@ -454,9 +498,6 @@ def test_setitem(self):
454498
self.frame['col6'] = series
455499
tm.assert_series_equal(series, self.frame['col6'], check_names=False)
456500

457-
with pytest.raises(KeyError):
458-
self.frame[np.random.randn(len(self.frame) + 1)] = 1
459-
460501
# set ndarray
461502
arr = np.random.randn(len(self.frame))
462503
self.frame['col9'] = arr

0 commit comments

Comments
 (0)