Skip to content

Commit 41a560e

Browse files
How Si Weihowsiwei
How Si Wei
authored andcommitted
Fix assignment to multiple columns when some column do not exist
1 parent 54e5803 commit 41a560e

File tree

5 files changed

+133
-18
lines changed

5 files changed

+133
-18
lines changed

doc/source/whatsnew/v1.0.0.rst

+32
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,38 @@ Backwards incompatible API changes
4242
- :class:`pandas.core.groupby.GroupBy.transform` now raises on invalid operation names (:issue:`27489`).
4343
-
4444

45+
46+
.. _whatsnew_1000.api_breaking.multicolumn_assignment:
47+
48+
Assignment to multiple columns of a DataFrame when some columns do not exist
49+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
50+
51+
Assignment to multiple columns of a :class:`DataFrame` when some of the columns do not exist would previously assign the values to the last column. Now, new columns would be constructed withe the right values. (:issue:`13658`)
52+
53+
.. ipython:: python
54+
df = pd.DataFrame({'a': [0, 1, 2], 'b': [3, 4, 5]})
55+
df
56+
57+
*Previous behavior*:
58+
59+
.. code-block:: ipython
60+
61+
In [3]: df[['a', 'c']] = 1
62+
In [4]: df
63+
Out[4]:
64+
a b
65+
0 1 1
66+
1 1 1
67+
2 1 1
68+
69+
*New behavior*:
70+
71+
.. ipython:: python
72+
73+
df[['a', 'c']] = 1
74+
df
75+
76+
4577
Other API changes
4678
^^^^^^^^^^^^^^^^^
4779

pandas/core/frame.py

+6
Original file line numberDiff line numberDiff line change
@@ -3006,6 +3006,12 @@ def _setitem_array(self, key, value):
30063006
for k1, k2 in zip(key, value.columns):
30073007
self[k1] = value[k2]
30083008
else:
3009+
if all(is_hashable(k) for k in key):
3010+
for k in key:
3011+
try:
3012+
self[k]
3013+
except KeyError:
3014+
self[k] = np.nan
30093015
indexer = self.loc._get_listlike_indexer(
30103016
key, axis=1, raise_missing=False
30113017
)[1]

pandas/core/indexing.py

+14
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from pandas.core.dtypes.common import (
1313
ensure_platform_int,
1414
is_float,
15+
is_hashable,
1516
is_integer,
1617
is_integer_dtype,
1718
is_iterator,
@@ -197,6 +198,19 @@ def _get_setitem_indexer(self, key):
197198
def __setitem__(self, key, value):
198199
if isinstance(key, tuple):
199200
key = tuple(com.apply_if_callable(x, self.obj) for x in key)
201+
if (
202+
self.name == "loc"
203+
and len(key) > 1
204+
and is_list_like_indexer(key[1])
205+
and not isinstance(key[1], tuple)
206+
and not com.is_bool_indexer(key[1])
207+
and all(is_hashable(k) for k in key[1])
208+
):
209+
for k in key[1]:
210+
try:
211+
self.obj[k]
212+
except KeyError:
213+
self.obj[k] = np.nan
200214
else:
201215
key = com.apply_if_callable(key, self.obj)
202216
indexer = self._get_setitem_indexer(key)

pandas/tests/frame/test_indexing.py

+41-18
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,47 @@ def test_setitem_list_of_tuples(self, float_frame):
208208
expected = Series(tuples, index=float_frame.index, name="tuples")
209209
assert_series_equal(result, expected)
210210

211+
def test_setitem_list_all_missing_columns_scalar(self, float_frame):
212+
# GH 26534
213+
result = float_frame.copy()
214+
result[["E", "F"]] = 1
215+
expected = float_frame.copy()
216+
# force the dtypes to be float as currently multcolumn assignment does not
217+
# change column dtype from float to int even when it's being assigned an int
218+
expected["E"] = 1.0
219+
expected["F"] = 1.0
220+
assert_frame_equal(result, expected)
221+
222+
def test_setitem_list_some_missing_columns_list(self, float_frame):
223+
# GH 26534
224+
result = float_frame.copy()
225+
result[["A", "E"]] = [1, 2]
226+
expected = float_frame.copy()
227+
# force the dtypes to be float as currently multcolumn assignment does not
228+
# change column dtype from float to int even when it's being assigned an int
229+
expected["A"] = 1.0
230+
expected["E"] = 2.0
231+
assert_frame_equal(result, expected)
232+
233+
def test_setitem_list_some_missing_columns_dataframe(self, float_frame):
234+
# GH 26534
235+
result = float_frame.copy()
236+
result[["A", "E"]] = float_frame[["B", "C"]]
237+
expected = float_frame.copy()
238+
expected["A"] = float_frame["B"]
239+
expected["E"] = float_frame["C"]
240+
assert_frame_equal(result, expected)
241+
242+
def test_setitem_list_some_missing_columns_2dlist(self):
243+
# GH 26534
244+
result = pd.DataFrame([[1, 2], [3, 4], [5, 6]], columns=["A", "B"])
245+
result[["B", "C", "D"]] = [[7, 8, 9], [10, 11, 12], [13, 14, 15]]
246+
expected = pd.DataFrame(
247+
[[1, 7, 8, 9], [3, 10, 11, 12], [5, 13, 14, 15]],
248+
columns=["A", "B", "C", "D"],
249+
)
250+
tm.assert_frame_equal(result, expected)
251+
211252
def test_setitem_mulit_index(self):
212253
# GH7655, test that assigning to a sub-frame of a frame
213254
# with multi-index columns aligns both rows and columns
@@ -501,13 +542,6 @@ def test_setitem(self, float_frame):
501542
float_frame["col6"] = series
502543
tm.assert_series_equal(series, float_frame["col6"], check_names=False)
503544

504-
msg = (
505-
r"\"None of \[Float64Index\(\[.*dtype='float64'\)\] are in the"
506-
r" \[columns\]\""
507-
)
508-
with pytest.raises(KeyError, match=msg):
509-
float_frame[np.random.randn(len(float_frame) + 1)] = 1
510-
511545
# set ndarray
512546
arr = np.random.randn(len(float_frame))
513547
float_frame["col9"] = arr
@@ -1143,17 +1177,6 @@ def test_fancy_index_int_labels_exceptions(self, float_frame):
11431177
)
11441178
with pytest.raises(KeyError, match=msg):
11451179
float_frame.ix[["foo", "bar", "baz"]] = 1
1146-
msg = (
1147-
r"None of \[Index\(\['E'\], dtype='object'\)\] are in the"
1148-
r" \[columns\]"
1149-
)
1150-
with pytest.raises(KeyError, match=msg):
1151-
float_frame.ix[:, ["E"]] = 1
1152-
1153-
# FIXME: don't leave commented-out
1154-
# partial setting now allows this GH2578
1155-
# pytest.raises(KeyError, float_frame.ix.__setitem__,
1156-
# (slice(None, None), 'E'), 1)
11571180

11581181
def test_setitem_fancy_mixed_2d(self, float_string_frame):
11591182

pandas/tests/indexing/test_loc.py

+40
Original file line numberDiff line numberDiff line change
@@ -808,6 +808,46 @@ def test_loc_setitem_with_scalar_index(self, indexer, value):
808808

809809
assert is_scalar(result) and result == "Z"
810810

811+
def test_loc_setitem_missing_columns_scalar_index_list_value(self):
812+
# GH 26534
813+
df = pd.DataFrame([[1, 2], [3, 4], [5, 6]], columns=["A", "B"])
814+
df.loc[1, ["C", "D"]] = [7, 8]
815+
expected = pd.DataFrame(
816+
[[1, 2, np.nan, np.nan], [3, 4, 7, 8], [5, 6, np.nan, np.nan]],
817+
columns=["A", "B", "C", "D"],
818+
)
819+
tm.assert_frame_equal(df, expected)
820+
821+
def test_loc_setitem_missing_columns_full_index_dataframe_value(self):
822+
# GH 26534
823+
df = pd.DataFrame([[1, 2], [3, 4], [5, 6]], columns=["A", "B"])
824+
df2 = pd.DataFrame([[7, 8], [9, 10], [11, 12]], columns=["A", "C"])
825+
df.loc[:, ["A", "C"]] = df2
826+
expected = pd.DataFrame(
827+
[[7, 2, 8], [9, 4, 10], [11, 6, 12]], columns=["A", "B", "C"]
828+
)
829+
tm.assert_frame_equal(df, expected)
830+
831+
def test_loc_setitem_missing_columns_list_index_scalar_value(self):
832+
# GH 26534
833+
df = pd.DataFrame([[1, 2], [3, 4], [5, 6]], columns=["A", "B"])
834+
df.loc[[0, 2], ["B", "C", "D"]] = 9
835+
expected = pd.DataFrame(
836+
[[1, 9, 9, 9], [3, 4, np.nan, np.nan], [5, 9, 9, 9]],
837+
columns=["A", "B", "C", "D"],
838+
)
839+
tm.assert_frame_equal(df, expected)
840+
841+
def test_loc_setitem_missing_columns_range_index_2dlist_value(self):
842+
# GH 26534
843+
df = pd.DataFrame([[1, 2], [3, 4], [5, 6]], columns=["A", "B"])
844+
df.loc[1:3, ["B", "C", "D"]] = [[7, 8, 9], [10, 11, 12]]
845+
expected = pd.DataFrame(
846+
[[1, 2, np.nan, np.nan], [3, 7, 8, 9], [5, 10, 11, 12]],
847+
columns=["A", "B", "C", "D"],
848+
)
849+
tm.assert_frame_equal(df, expected)
850+
811851
def test_loc_coercion(self):
812852

813853
# 12411

0 commit comments

Comments
 (0)