Skip to content

Commit d3190d3

Browse files
Si Wei Howhowsiwei
Si Wei How
authored andcommitted
Fix assignment to multiple columns when some column do not exist
1 parent 0a516c1 commit d3190d3

File tree

4 files changed

+90
-14
lines changed

4 files changed

+90
-14
lines changed

doc/source/whatsnew/v0.25.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -425,6 +425,7 @@ Indexing
425425
- Bug in which :meth:`DataFrame.to_csv` caused a segfault for a reindexed data frame, when the indices were single-level :class:`MultiIndex` (:issue:`26303`).
426426
- Fixed bug where assigning a :class:`arrays.PandasArray` to a :class:`pandas.core.frame.DataFrame` would raise error (:issue:`26390`)
427427
- Allow keyword arguments for callable local reference used in the :method:`DataFrame.query` string (:issue:`26426`)
428+
- Bug in assignment to multiple columns of a `DataFrame` when some of the columns do not exist (:issue:`13658`)
428429

429430

430431
Missing

pandas/core/frame.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -3373,7 +3373,8 @@ def _setitem_array(self, key, value):
33733373
for k1, k2 in zip(key, value.columns):
33743374
self[k1] = value[k2]
33753375
else:
3376-
indexer = self.loc._convert_to_indexer(key, axis=1)
3376+
indexer = self.loc._convert_to_indexer(key, axis=1,
3377+
is_setter=True)
33773378
self._check_setitem_copy()
33783379
self.loc._setitem_with_indexer((slice(None), indexer), value)
33793380

pandas/core/indexing.py

+35-5
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,15 @@ def _setitem_with_indexer(self, indexer, value):
304304
# maybe partial set
305305
take_split_path = self.obj._is_mixed_type
306306

307+
has_missing_columns = (
308+
isinstance(indexer, tuple) and
309+
len(indexer) > info_axis and
310+
is_list_like_indexer(indexer[info_axis]) and
311+
any(isinstance(k, dict) for k in indexer[info_axis]))
312+
313+
if has_missing_columns:
314+
take_split_path = True
315+
307316
# if there is only one block/type, still have to take split path
308317
# unless the block is one-dimensional or it can hold the value
309318
if not take_split_path and self.obj._data.blocks:
@@ -466,14 +475,20 @@ def _setitem_with_indexer(self, indexer, value):
466475
if isinstance(value, ABCSeries):
467476
value = self._align_series(indexer, value)
468477

469-
info_idx = indexer[info_axis]
470-
if is_integer(info_idx):
471-
info_idx = [info_idx]
472-
labels = item_labels[info_idx]
478+
if has_missing_columns:
479+
labels = [idx['key'] if isinstance(idx, dict) else
480+
item_labels[idx]
481+
for idx in indexer[info_axis]]
482+
else:
483+
info_idx = indexer[info_axis]
484+
if is_integer(info_idx):
485+
info_idx = [info_idx]
486+
labels = item_labels[info_idx]
473487

474488
# if we have a partial multiindex, then need to adjust the plane
475489
# indexer here
476490
if (len(labels) == 1 and
491+
labels[0] in self.obj.columns and
477492
isinstance(self.obj[labels[0]].axes[0], MultiIndex)):
478493
item = labels[0]
479494
obj = self.obj[item]
@@ -517,7 +532,9 @@ def _setitem_with_indexer(self, indexer, value):
517532
lplane_indexer = 0
518533

519534
def setter(item, v):
520-
s = self.obj[item]
535+
is_missing_column = item not in self.obj.columns
536+
if not is_missing_column:
537+
s = self.obj[item]
521538
pi = plane_indexer[0] if lplane_indexer == 1 else plane_indexer
522539

523540
# perform the equivalent of a setitem on the info axis
@@ -529,8 +546,14 @@ def setter(item, v):
529546
all(com.is_null_slice(idx) or
530547
com.is_full_slice(idx, len(self.obj))
531548
for idx in pi)):
549+
if is_missing_column:
550+
self.obj[item] = v
551+
return
532552
s = v
533553
else:
554+
if is_missing_column:
555+
self[plane_indexer[0], item] = v
556+
return
534557
# set the item, possibly having a dtype change
535558
s._consolidate_inplace()
536559
s = s.copy()
@@ -1352,6 +1375,13 @@ def _convert_to_indexer(self, obj, axis=None, is_setter=False,
13521375
inds, = obj.nonzero()
13531376
return inds
13541377
else:
1378+
# allow missing columns when setting
1379+
if is_setter and axis == 1:
1380+
return [
1381+
self._convert_to_indexer(k, axis=axis,
1382+
is_setter=is_setter,
1383+
raise_missing=raise_missing)
1384+
for k in obj]
13551385
# When setting, missing keys are not allowed, even with .loc:
13561386
kwargs = {'raise_missing': True if is_setter else
13571387
raise_missing}

pandas/tests/frame/test_indexing.py

+52-8
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@
1818
from pandas.tests.frame.common import TestData
1919
import pandas.util.testing as tm
2020
from pandas.util.testing import (
21-
assert_almost_equal, assert_frame_equal, assert_series_equal)
21+
assert_almost_equal, assert_frame_equal, assert_index_equal,
22+
assert_numpy_array_equal, assert_series_equal)
2223

2324
from pandas.tseries.offsets import BDay
2425

@@ -185,6 +186,51 @@ def test_setitem_list_of_tuples(self):
185186
expected = Series(tuples, index=self.frame.index, name='tuples')
186187
assert_series_equal(result, expected)
187188

189+
def test_setitem_list_all_missing_columns_scalar(self):
190+
df = self.frame.copy()
191+
df[['E', 'F']] = 1
192+
193+
result = df.columns
194+
expected = self.frame.columns.union(['E', 'F'])
195+
assert_index_equal(result, expected)
196+
197+
result = df[self.frame.columns]
198+
expected = self.frame
199+
assert_frame_equal(result, expected)
200+
201+
assert (df[['E', 'F']] == 1).all().all()
202+
203+
def test_setitem_list_some_missing_columns_list(self):
204+
df = self.frame.copy()
205+
df[['A', 'E']] = [1, 2]
206+
207+
result = df.columns
208+
expected = self.frame.columns.union(['E'])
209+
assert_index_equal(result, expected)
210+
211+
result = df[self.frame.columns.drop('A')]
212+
expected = self.frame.drop('A', axis=1)
213+
assert_frame_equal(result, expected)
214+
215+
assert (df['A'] == 1).all()
216+
assert (df['E'] == 2).all()
217+
218+
def test_setitem_list_some_missing_columns_dataframe(self):
219+
df = self.frame.copy()
220+
df[['A', 'E']] = self.frame[['B', 'C']]
221+
222+
result = df.columns
223+
expected = self.frame.columns.union(['E'])
224+
assert_index_equal(result, expected)
225+
226+
result = df[self.frame.columns.drop('A')]
227+
expected = self.frame.drop('A', axis=1)
228+
assert_frame_equal(result, expected)
229+
230+
result = df[['A', 'E']]
231+
expected = self.frame[['B', 'C']]
232+
assert_numpy_array_equal(result.to_numpy(), expected.to_numpy())
233+
188234
def test_setitem_mulit_index(self):
189235
# GH7655, test that assigning to a sub-frame of a frame
190236
# with multi-index columns aligns both rows and columns
@@ -454,9 +500,6 @@ def test_setitem(self):
454500
self.frame['col6'] = series
455501
tm.assert_series_equal(series, self.frame['col6'], check_names=False)
456502

457-
with pytest.raises(KeyError):
458-
self.frame[np.random.randn(len(self.frame) + 1)] = 1
459-
460503
# set ndarray
461504
arr = np.random.randn(len(self.frame))
462505
self.frame['col9'] = arr
@@ -1093,10 +1136,11 @@ def test_fancy_index_int_labels_exceptions(self):
10931136
r" dtype='object'\)\] are in the \[index\]")
10941137
with pytest.raises(KeyError, match=msg):
10951138
self.frame.ix[['foo', 'bar', 'baz']] = 1
1096-
msg = (r"None of \[Index\(\['E'\], dtype='object'\)\] are in the"
1097-
r" \[columns\]")
1098-
with pytest.raises(KeyError, match=msg):
1099-
self.frame.ix[:, ['E']] = 1
1139+
# partial setting now allows this GH13658
1140+
# msg = (r"None of \[Index\(\['E'\], dtype='object'\)\] are in the"
1141+
# r" \[columns\]")
1142+
# with pytest.raises(KeyError, match=msg):
1143+
# self.frame.ix[:, ['E']] = 1
11001144

11011145
# partial setting now allows this GH2578
11021146
# pytest.raises(KeyError, self.frame.ix.__setitem__,

0 commit comments

Comments
 (0)