Skip to content

Commit 98de71c

Browse files
Si Wei Howhowsiwei
Si Wei How
authored andcommitted
Fix assignment to multiple columns when some column do not exist
1 parent 0a516c1 commit 98de71c

File tree

4 files changed

+89
-14
lines changed

4 files changed

+89
-14
lines changed

doc/source/whatsnew/v0.25.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -425,6 +425,7 @@ Indexing
425425
- Bug in which :meth:`DataFrame.to_csv` caused a segfault for a reindexed data frame, when the indices were single-level :class:`MultiIndex` (:issue:`26303`).
426426
- Fixed bug where assigning a :class:`arrays.PandasArray` to a :class:`pandas.core.frame.DataFrame` would raise error (:issue:`26390`)
427427
- Allow keyword arguments for callable local reference used in the :method:`DataFrame.query` string (:issue:`26426`)
428+
- Bug in assignment to multiple columns of a `DataFrame` when some of the columns do not exist (:issue:`13658`)
428429

429430

430431
Missing

pandas/core/frame.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -3373,7 +3373,8 @@ def _setitem_array(self, key, value):
33733373
for k1, k2 in zip(key, value.columns):
33743374
self[k1] = value[k2]
33753375
else:
3376-
indexer = self.loc._convert_to_indexer(key, axis=1)
3376+
indexer = self.loc._convert_to_indexer(key, axis=1,
3377+
is_setter=True)
33773378
self._check_setitem_copy()
33783379
self.loc._setitem_with_indexer((slice(None), indexer), value)
33793380

pandas/core/indexing.py

+34-5
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,15 @@ def _setitem_with_indexer(self, indexer, value):
304304
# maybe partial set
305305
take_split_path = self.obj._is_mixed_type
306306

307+
has_missing_columns = (
308+
isinstance(indexer, tuple) and
309+
len(indexer) > info_axis and
310+
is_list_like_indexer(indexer[info_axis]) and
311+
any(isinstance(k, dict) for k in indexer[info_axis]))
312+
313+
if has_missing_columns:
314+
take_split_path = True
315+
307316
# if there is only one block/type, still have to take split path
308317
# unless the block is one-dimensional or it can hold the value
309318
if not take_split_path and self.obj._data.blocks:
@@ -466,14 +475,19 @@ def _setitem_with_indexer(self, indexer, value):
466475
if isinstance(value, ABCSeries):
467476
value = self._align_series(indexer, value)
468477

469-
info_idx = indexer[info_axis]
470-
if is_integer(info_idx):
471-
info_idx = [info_idx]
472-
labels = item_labels[info_idx]
478+
if has_missing_columns:
479+
labels = [idx if isinstance(idx, dict) else item_labels[idx]
480+
for idx in indexer[info_axis]]
481+
else:
482+
info_idx = indexer[info_axis]
483+
if is_integer(info_idx):
484+
info_idx = [info_idx]
485+
labels = item_labels[info_idx]
473486

474487
# if we have a partial multiindex, then need to adjust the plane
475488
# indexer here
476489
if (len(labels) == 1 and
490+
labels[0] in self.obj.columns and
477491
isinstance(self.obj[labels[0]].axes[0], MultiIndex)):
478492
item = labels[0]
479493
obj = self.obj[item]
@@ -517,7 +531,9 @@ def _setitem_with_indexer(self, indexer, value):
517531
lplane_indexer = 0
518532

519533
def setter(item, v):
520-
s = self.obj[item]
534+
item, missing = convert_missing_indexer(item)
535+
if not missing:
536+
s = self.obj[item]
521537
pi = plane_indexer[0] if lplane_indexer == 1 else plane_indexer
522538

523539
# perform the equivalent of a setitem on the info axis
@@ -529,8 +545,14 @@ def setter(item, v):
529545
all(com.is_null_slice(idx) or
530546
com.is_full_slice(idx, len(self.obj))
531547
for idx in pi)):
548+
if missing:
549+
self.obj[item] = v
550+
return
532551
s = v
533552
else:
553+
if missing:
554+
self[plane_indexer[0], item] = v
555+
return
534556
# set the item, possibly having a dtype change
535557
s._consolidate_inplace()
536558
s = s.copy()
@@ -1352,6 +1374,13 @@ def _convert_to_indexer(self, obj, axis=None, is_setter=False,
13521374
inds, = obj.nonzero()
13531375
return inds
13541376
else:
1377+
# allow missing columns when setting
1378+
if is_setter and axis == self.obj._info_axis_number:
1379+
return [
1380+
self._convert_to_indexer(k, axis=axis,
1381+
is_setter=is_setter,
1382+
raise_missing=raise_missing)
1383+
for k in obj]
13551384
# When setting, missing keys are not allowed, even with .loc:
13561385
kwargs = {'raise_missing': True if is_setter else
13571386
raise_missing}

pandas/tests/frame/test_indexing.py

+52-8
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@
1818
from pandas.tests.frame.common import TestData
1919
import pandas.util.testing as tm
2020
from pandas.util.testing import (
21-
assert_almost_equal, assert_frame_equal, assert_series_equal)
21+
assert_almost_equal, assert_frame_equal, assert_index_equal,
22+
assert_numpy_array_equal, assert_series_equal)
2223

2324
from pandas.tseries.offsets import BDay
2425

@@ -185,6 +186,51 @@ def test_setitem_list_of_tuples(self):
185186
expected = Series(tuples, index=self.frame.index, name='tuples')
186187
assert_series_equal(result, expected)
187188

189+
def test_setitem_list_all_missing_columns_scalar(self):
190+
df = self.frame.copy()
191+
df[['E', 'F']] = 1
192+
193+
result = df.columns
194+
expected = self.frame.columns.union(['E', 'F'])
195+
assert_index_equal(result, expected)
196+
197+
result = df[self.frame.columns]
198+
expected = self.frame
199+
assert_frame_equal(result, expected)
200+
201+
assert (df[['E', 'F']] == 1).all().all()
202+
203+
def test_setitem_list_some_missing_columns_list(self):
204+
df = self.frame.copy()
205+
df[['A', 'E']] = [1, 2]
206+
207+
result = df.columns
208+
expected = self.frame.columns.union(['E'])
209+
assert_index_equal(result, expected)
210+
211+
result = df[self.frame.columns.drop('A')]
212+
expected = self.frame.drop('A', axis=1)
213+
assert_frame_equal(result, expected)
214+
215+
assert (df['A'] == 1).all()
216+
assert (df['E'] == 2).all()
217+
218+
def test_setitem_list_some_missing_columns_dataframe(self):
219+
df = self.frame.copy()
220+
df[['A', 'E']] = self.frame[['B', 'C']]
221+
222+
result = df.columns
223+
expected = self.frame.columns.union(['E'])
224+
assert_index_equal(result, expected)
225+
226+
result = df[self.frame.columns.drop('A')]
227+
expected = self.frame.drop('A', axis=1)
228+
assert_frame_equal(result, expected)
229+
230+
result = df[['A', 'E']]
231+
expected = self.frame[['B', 'C']]
232+
assert_numpy_array_equal(result.to_numpy(), expected.to_numpy())
233+
188234
def test_setitem_mulit_index(self):
189235
# GH7655, test that assigning to a sub-frame of a frame
190236
# with multi-index columns aligns both rows and columns
@@ -454,9 +500,6 @@ def test_setitem(self):
454500
self.frame['col6'] = series
455501
tm.assert_series_equal(series, self.frame['col6'], check_names=False)
456502

457-
with pytest.raises(KeyError):
458-
self.frame[np.random.randn(len(self.frame) + 1)] = 1
459-
460503
# set ndarray
461504
arr = np.random.randn(len(self.frame))
462505
self.frame['col9'] = arr
@@ -1093,10 +1136,11 @@ def test_fancy_index_int_labels_exceptions(self):
10931136
r" dtype='object'\)\] are in the \[index\]")
10941137
with pytest.raises(KeyError, match=msg):
10951138
self.frame.ix[['foo', 'bar', 'baz']] = 1
1096-
msg = (r"None of \[Index\(\['E'\], dtype='object'\)\] are in the"
1097-
r" \[columns\]")
1098-
with pytest.raises(KeyError, match=msg):
1099-
self.frame.ix[:, ['E']] = 1
1139+
# partial setting now allows this GH13658
1140+
# msg = (r"None of \[Index\(\['E'\], dtype='object'\)\] are in the"
1141+
# r" \[columns\]")
1142+
# with pytest.raises(KeyError, match=msg):
1143+
# self.frame.ix[:, ['E']] = 1
11001144

11011145
# partial setting now allows this GH2578
11021146
# pytest.raises(KeyError, self.frame.ix.__setitem__,

0 commit comments

Comments
 (0)