Skip to content

Commit 810a4e5

Browse files
authored
BUG: assignment to multiple columns when some column do not exist (#29334)
1 parent 1a5b11d commit 810a4e5

File tree

5 files changed

+187
-7
lines changed

5 files changed

+187
-7
lines changed

doc/source/whatsnew/v1.1.0.rst

+31
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,37 @@ key and type of :class:`Index`. These now consistently raise ``KeyError`` (:iss
168168
169169
.. ---------------------------------------------------------------------------
170170
171+
.. _whatsnew_110.api_breaking.assignment_to_multiple_columns:
172+
173+
Assignment to multiple columns of a DataFrame when some columns do not exist
174+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
175+
176+
Assignment to multiple columns of a :class:`DataFrame` when some of the columns do not exist would previously assign the values to the last column. Now, new columns would be constructed with the right values. (:issue:`13658`)
177+
178+
.. ipython:: python
179+
180+
df = pd.DataFrame({'a': [0, 1, 2], 'b': [3, 4, 5]})
181+
df
182+
183+
*Previous behavior*:
184+
185+
.. code-block:: ipython
186+
187+
In [3]: df[['a', 'c']] = 1
188+
In [4]: df
189+
Out[4]:
190+
a b
191+
0 1 1
192+
1 1 1
193+
2 1 1
194+
195+
*New behavior*:
196+
197+
.. ipython:: python
198+
199+
df[['a', 'c']] = 1
200+
df
201+
171202
.. _whatsnew_110.deprecations:
172203

173204
Deprecations

pandas/core/frame.py

+1
Original file line numberDiff line numberDiff line change
@@ -2687,6 +2687,7 @@ def _setitem_array(self, key, value):
26872687
for k1, k2 in zip(key, value.columns):
26882688
self[k1] = value[k2]
26892689
else:
2690+
self.loc._ensure_listlike_indexer(key, axis=1)
26902691
indexer = self.loc._get_listlike_indexer(
26912692
key, axis=1, raise_missing=False
26922693
)[1]

pandas/core/indexing.py

+40
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from pandas.util._decorators import Appender
99

1010
from pandas.core.dtypes.common import (
11+
is_hashable,
1112
is_integer,
1213
is_iterator,
1314
is_list_like,
@@ -581,6 +582,9 @@ def _get_setitem_indexer(self, key):
581582
"""
582583
Convert a potentially-label-based key into a positional indexer.
583584
"""
585+
if self.name == "loc":
586+
self._ensure_listlike_indexer(key)
587+
584588
if self.axis is not None:
585589
return self._convert_tuple(key, is_setter=True)
586590

@@ -611,6 +615,42 @@ def _get_setitem_indexer(self, key):
611615
raise
612616
raise IndexingError(key) from e
613617

618+
def _ensure_listlike_indexer(self, key, axis=None):
619+
"""
620+
Ensure that a list-like of column labels are all present by adding them if
621+
they do not already exist.
622+
623+
Parameters
624+
----------
625+
key : _LocIndexer key or list-like of column labels
626+
Target labels.
627+
axis : key axis if known
628+
"""
629+
column_axis = 1
630+
631+
# column only exists in 2-dimensional DataFrame
632+
if self.ndim != 2:
633+
return
634+
635+
if isinstance(key, tuple):
636+
# key may be a tuple if key is a _LocIndexer key
637+
# in that case, set key to the column part of key
638+
key = key[column_axis]
639+
axis = column_axis
640+
641+
if (
642+
axis == column_axis
643+
and not isinstance(self.obj.columns, ABCMultiIndex)
644+
and is_list_like_indexer(key)
645+
and not com.is_bool_indexer(key)
646+
and all(is_hashable(k) for k in key)
647+
):
648+
for k in key:
649+
try:
650+
self.obj[k]
651+
except KeyError:
652+
self.obj[k] = np.nan
653+
614654
def __setitem__(self, key, value):
615655
if isinstance(key, tuple):
616656
key = tuple(com.apply_if_callable(x, self.obj) for x in key)

pandas/tests/frame/indexing/test_indexing.py

+57-7
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,63 @@ def test_setitem_list_of_tuples(self, float_frame):
215215
expected = Series(tuples, index=float_frame.index, name="tuples")
216216
tm.assert_series_equal(result, expected)
217217

218+
@pytest.mark.parametrize(
219+
"columns,box,expected",
220+
[
221+
(
222+
["A", "B", "C", "D"],
223+
7,
224+
pd.DataFrame(
225+
[[7, 7, 7, 7], [7, 7, 7, 7], [7, 7, 7, 7]],
226+
columns=["A", "B", "C", "D"],
227+
),
228+
),
229+
(
230+
["C", "D"],
231+
[7, 8],
232+
pd.DataFrame(
233+
[[1, 2, 7, 8], [3, 4, 7, 8], [5, 6, 7, 8]],
234+
columns=["A", "B", "C", "D"],
235+
),
236+
),
237+
(
238+
["A", "B", "C"],
239+
np.array([7, 8, 9], dtype=np.int64),
240+
pd.DataFrame(
241+
[[7, 8, 9], [7, 8, 9], [7, 8, 9]], columns=["A", "B", "C"]
242+
),
243+
),
244+
(
245+
["B", "C", "D"],
246+
[[7, 8, 9], [10, 11, 12], [13, 14, 15]],
247+
pd.DataFrame(
248+
[[1, 7, 8, 9], [3, 10, 11, 12], [5, 13, 14, 15]],
249+
columns=["A", "B", "C", "D"],
250+
),
251+
),
252+
(
253+
["C", "A", "D"],
254+
np.array([[7, 8, 9], [10, 11, 12], [13, 14, 15]], dtype=np.int64),
255+
pd.DataFrame(
256+
[[8, 2, 7, 9], [11, 4, 10, 12], [14, 6, 13, 15]],
257+
columns=["A", "B", "C", "D"],
258+
),
259+
),
260+
(
261+
["A", "C"],
262+
pd.DataFrame([[7, 8], [9, 10], [11, 12]], columns=["A", "C"]),
263+
pd.DataFrame(
264+
[[7, 2, 8], [9, 4, 10], [11, 6, 12]], columns=["A", "B", "C"]
265+
),
266+
),
267+
],
268+
)
269+
def test_setitem_list_missing_columns(self, columns, box, expected):
270+
# GH 29334
271+
df = pd.DataFrame([[1, 2], [3, 4], [5, 6]], columns=["A", "B"])
272+
df[columns] = box
273+
tm.assert_frame_equal(df, expected)
274+
218275
def test_setitem_multi_index(self):
219276
# GH7655, test that assigning to a sub-frame of a frame
220277
# with multi-index columns aligns both rows and columns
@@ -459,13 +516,6 @@ def test_setitem(self, float_frame):
459516
float_frame["col6"] = series
460517
tm.assert_series_equal(series, float_frame["col6"], check_names=False)
461518

462-
msg = (
463-
r"\"None of \[Float64Index\(\[.*dtype='float64'\)\] are in the "
464-
r"\[columns\]\""
465-
)
466-
with pytest.raises(KeyError, match=msg):
467-
float_frame[np.random.randn(len(float_frame) + 1)] = 1
468-
469519
# set ndarray
470520
arr = np.random.randn(len(float_frame))
471521
float_frame["col9"] = arr

pandas/tests/indexing/test_loc.py

+58
Original file line numberDiff line numberDiff line change
@@ -631,6 +631,64 @@ def test_loc_setitem_with_scalar_index(self, indexer, value):
631631

632632
assert is_scalar(result) and result == "Z"
633633

634+
@pytest.mark.parametrize(
635+
"index,box,expected",
636+
[
637+
(
638+
([0, 2], ["A", "B", "C", "D"]),
639+
7,
640+
pd.DataFrame(
641+
[[7, 7, 7, 7], [3, 4, np.nan, np.nan], [7, 7, 7, 7]],
642+
columns=["A", "B", "C", "D"],
643+
),
644+
),
645+
(
646+
(1, ["C", "D"]),
647+
[7, 8],
648+
pd.DataFrame(
649+
[[1, 2, np.nan, np.nan], [3, 4, 7, 8], [5, 6, np.nan, np.nan]],
650+
columns=["A", "B", "C", "D"],
651+
),
652+
),
653+
(
654+
(1, ["A", "B", "C"]),
655+
np.array([7, 8, 9], dtype=np.int64),
656+
pd.DataFrame(
657+
[[1, 2, np.nan], [7, 8, 9], [5, 6, np.nan]],
658+
columns=["A", "B", "C"],
659+
),
660+
),
661+
(
662+
(slice(1, 3, None), ["B", "C", "D"]),
663+
[[7, 8, 9], [10, 11, 12]],
664+
pd.DataFrame(
665+
[[1, 2, np.nan, np.nan], [3, 7, 8, 9], [5, 10, 11, 12]],
666+
columns=["A", "B", "C", "D"],
667+
),
668+
),
669+
(
670+
(slice(1, 3, None), ["C", "A", "D"]),
671+
np.array([[7, 8, 9], [10, 11, 12]], dtype=np.int64),
672+
pd.DataFrame(
673+
[[1, 2, np.nan, np.nan], [8, 4, 7, 9], [11, 6, 10, 12]],
674+
columns=["A", "B", "C", "D"],
675+
),
676+
),
677+
(
678+
(slice(None, None, None), ["A", "C"]),
679+
pd.DataFrame([[7, 8], [9, 10], [11, 12]], columns=["A", "C"]),
680+
pd.DataFrame(
681+
[[7, 2, 8], [9, 4, 10], [11, 6, 12]], columns=["A", "B", "C"]
682+
),
683+
),
684+
],
685+
)
686+
def test_loc_setitem_missing_columns(self, index, box, expected):
687+
# GH 29334
688+
df = pd.DataFrame([[1, 2], [3, 4], [5, 6]], columns=["A", "B"])
689+
df.loc[index] = box
690+
tm.assert_frame_equal(df, expected)
691+
634692
def test_loc_coercion(self):
635693

636694
# 12411

0 commit comments

Comments
 (0)