Skip to content

Commit c9ff25a

Browse files
author
Si Wei How
committed
Fix assignment to multiple columns when some column do not exist
1 parent 0a516c1 commit c9ff25a

File tree

4 files changed

+88
-14
lines changed

4 files changed

+88
-14
lines changed

doc/source/whatsnew/v0.25.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -425,6 +425,7 @@ Indexing
425425
- Bug in which :meth:`DataFrame.to_csv` caused a segfault for a reindexed data frame, when the indices were single-level :class:`MultiIndex` (:issue:`26303`).
426426
- Fixed bug where assigning a :class:`arrays.PandasArray` to a :class:`pandas.core.frame.DataFrame` would raise error (:issue:`26390`)
427427
- Allow keyword arguments for callable local reference used in the :method:`DataFrame.query` string (:issue:`26426`)
428+
- Bug in assignment to multiple columns of a `DataFrame` when some of the columns do not exist (:issue:`13658`)
428429

429430

430431
Missing

pandas/core/frame.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -3373,7 +3373,8 @@ def _setitem_array(self, key, value):
33733373
for k1, k2 in zip(key, value.columns):
33743374
self[k1] = value[k2]
33753375
else:
3376-
indexer = self.loc._convert_to_indexer(key, axis=1)
3376+
indexer = self.loc._convert_to_indexer(key, axis=1,
3377+
is_setter=True)
33773378
self._check_setitem_copy()
33783379
self.loc._setitem_with_indexer((slice(None), indexer), value)
33793380

pandas/core/indexing.py

+33-5
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,14 @@ def _setitem_with_indexer(self, indexer, value):
304304
# maybe partial set
305305
take_split_path = self.obj._is_mixed_type
306306

307+
has_missing_columns = (
308+
isinstance(indexer, tuple) and
309+
is_list_like_indexer(indexer[info_axis]) and
310+
any(isinstance(k, dict) for k in indexer[1]))
311+
312+
if has_missing_columns:
313+
take_split_path = True
314+
307315
# if there is only one block/type, still have to take split path
308316
# unless the block is one-dimensional or it can hold the value
309317
if not take_split_path and self.obj._data.blocks:
@@ -466,14 +474,19 @@ def _setitem_with_indexer(self, indexer, value):
466474
if isinstance(value, ABCSeries):
467475
value = self._align_series(indexer, value)
468476

469-
info_idx = indexer[info_axis]
470-
if is_integer(info_idx):
471-
info_idx = [info_idx]
472-
labels = item_labels[info_idx]
477+
if has_missing_columns:
478+
labels = [item_labels[idx] if is_integer(idx) else idx['key']
479+
for idx in indexer[info_axis]]
480+
else:
481+
info_idx = indexer[info_axis]
482+
if is_integer(info_idx):
483+
info_idx = [info_idx]
484+
labels = item_labels[info_idx]
473485

474486
# if we have a partial multiindex, then need to adjust the plane
475487
# indexer here
476488
if (len(labels) == 1 and
489+
labels[0] in self.obj.columns and
477490
isinstance(self.obj[labels[0]].axes[0], MultiIndex)):
478491
item = labels[0]
479492
obj = self.obj[item]
@@ -517,7 +530,9 @@ def _setitem_with_indexer(self, indexer, value):
517530
lplane_indexer = 0
518531

519532
def setter(item, v):
520-
s = self.obj[item]
533+
is_missing_column = item not in self.obj.columns
534+
if not is_missing_column:
535+
s = self.obj[item]
521536
pi = plane_indexer[0] if lplane_indexer == 1 else plane_indexer
522537

523538
# perform the equivalent of a setitem on the info axis
@@ -529,8 +544,14 @@ def setter(item, v):
529544
all(com.is_null_slice(idx) or
530545
com.is_full_slice(idx, len(self.obj))
531546
for idx in pi)):
547+
if is_missing_column:
548+
self.obj[item] = v
549+
return
532550
s = v
533551
else:
552+
if is_missing_column:
553+
self[plane_indexer[0], item] = v
554+
return
534555
# set the item, possibly having a dtype change
535556
s._consolidate_inplace()
536557
s = s.copy()
@@ -1352,6 +1373,13 @@ def _convert_to_indexer(self, obj, axis=None, is_setter=False,
13521373
inds, = obj.nonzero()
13531374
return inds
13541375
else:
1376+
# allow missing columns when setting
1377+
if is_setter and axis == 1:
1378+
return [
1379+
self._convert_to_indexer(k, axis=axis,
1380+
is_setter=is_setter,
1381+
raise_missing=raise_missing)
1382+
for k in obj]
13551383
# When setting, missing keys are not allowed, even with .loc:
13561384
kwargs = {'raise_missing': True if is_setter else
13571385
raise_missing}

pandas/tests/frame/test_indexing.py

+52-8
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@
1818
from pandas.tests.frame.common import TestData
1919
import pandas.util.testing as tm
2020
from pandas.util.testing import (
21-
assert_almost_equal, assert_frame_equal, assert_series_equal)
21+
assert_almost_equal, assert_frame_equal, assert_index_equal,
22+
assert_numpy_array_equal, assert_series_equal)
2223

2324
from pandas.tseries.offsets import BDay
2425

@@ -185,6 +186,51 @@ def test_setitem_list_of_tuples(self):
185186
expected = Series(tuples, index=self.frame.index, name='tuples')
186187
assert_series_equal(result, expected)
187188

189+
def test_setitem_list_all_missing_columns_scalar(self):
190+
df = self.frame.copy()
191+
df[['E', 'F']] = 1
192+
193+
result = df.columns
194+
expected = self.frame.columns.union(['E', 'F'])
195+
assert_index_equal(result, expected)
196+
197+
result = df[self.frame.columns]
198+
expected = self.frame
199+
assert_frame_equal(result, expected)
200+
201+
assert (df[['E', 'F']] == 1).all().all()
202+
203+
def test_setitem_list_some_missing_columns_list(self):
204+
df = self.frame.copy()
205+
df[['A', 'E']] = [1, 2]
206+
207+
result = df.columns
208+
expected = self.frame.columns.union(['E'])
209+
assert_index_equal(result, expected)
210+
211+
result = df[self.frame.columns.drop('A')]
212+
expected = self.frame.drop('A', axis=1)
213+
assert_frame_equal(result, expected)
214+
215+
assert (df['A'] == 1).all()
216+
assert (df['E'] == 2).all()
217+
218+
def test_setitem_list_some_missing_columns_dataframe(self):
219+
df = self.frame.copy()
220+
df[['A', 'E']] = self.frame[['B', 'C']]
221+
222+
result = df.columns
223+
expected = self.frame.columns.union(['E'])
224+
assert_index_equal(result, expected)
225+
226+
result = df[self.frame.columns.drop('A')]
227+
expected = self.frame.drop('A', axis=1)
228+
assert_frame_equal(result, expected)
229+
230+
result = df[['A', 'E']]
231+
expected = self.frame[['B', 'C']]
232+
assert_numpy_array_equal(result.to_numpy(), expected.to_numpy())
233+
188234
def test_setitem_mulit_index(self):
189235
# GH7655, test that assigning to a sub-frame of a frame
190236
# with multi-index columns aligns both rows and columns
@@ -454,9 +500,6 @@ def test_setitem(self):
454500
self.frame['col6'] = series
455501
tm.assert_series_equal(series, self.frame['col6'], check_names=False)
456502

457-
with pytest.raises(KeyError):
458-
self.frame[np.random.randn(len(self.frame) + 1)] = 1
459-
460503
# set ndarray
461504
arr = np.random.randn(len(self.frame))
462505
self.frame['col9'] = arr
@@ -1093,10 +1136,11 @@ def test_fancy_index_int_labels_exceptions(self):
10931136
r" dtype='object'\)\] are in the \[index\]")
10941137
with pytest.raises(KeyError, match=msg):
10951138
self.frame.ix[['foo', 'bar', 'baz']] = 1
1096-
msg = (r"None of \[Index\(\['E'\], dtype='object'\)\] are in the"
1097-
r" \[columns\]")
1098-
with pytest.raises(KeyError, match=msg):
1099-
self.frame.ix[:, ['E']] = 1
1139+
# partial setting now allows this GH13658
1140+
# msg = (r"None of \[Index\(\['E'\], dtype='object'\)\] are in the"
1141+
# r" \[columns\]")
1142+
# with pytest.raises(KeyError, match=msg):
1143+
# self.frame.ix[:, ['E']] = 1
11001144

11011145
# partial setting now allows this GH2578
11021146
# pytest.raises(KeyError, self.frame.ix.__setitem__,

0 commit comments

Comments
 (0)