Skip to content

Commit 73e0b5e

Browse files
REGR: fix regression in scalar setitem with setting a length-1 array-like (#48057)
1 parent ca43cde commit 73e0b5e

File tree

3 files changed

+98
-2
lines changed

3 files changed

+98
-2
lines changed

doc/source/whatsnew/v1.4.4.rst

+1
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ Fixed regressions
1818
- Fixed regression in :func:`concat` materializing :class:`Index` during sorting even if :class:`Index` was already sorted (:issue:`47501`)
1919
- Fixed regression in :meth:`DataFrame.loc` not updating the cache correctly after values were set (:issue:`47867`)
2020
- Fixed regression in :meth:`DataFrame.loc` not aligning index in some cases when setting a :class:`DataFrame` (:issue:`47578`)
21+
- Fixed regression in :meth:`DataFrame.loc` setting a length-1 array like value to a single value in the DataFrame (:issue:`46268`)
2122
- Fixed regression in setting ``None`` or non-string value into a ``string``-dtype Series using a mask (:issue:`47628`)
2223
- Fixed regression in :func:`merge` throwing an error when passing a :class:`Series` with a multi-level name (:issue:`47946`)
2324
- Fixed regression in :meth:`DataFrame.eval` creating a copy when updating inplace (:issue:`47449`)

pandas/core/indexing.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -1854,8 +1854,10 @@ def _setitem_with_indexer_split_path(self, indexer, value, name: str):
18541854
# We get here in one case via .loc with a all-False mask
18551855
pass
18561856

1857-
elif self._is_scalar_access(indexer):
1858-
# We are setting nested data
1857+
elif self._is_scalar_access(indexer) and is_object_dtype(
1858+
self.obj.dtypes[ilocs[0]]
1859+
):
1860+
# We are setting nested data, only possible for object dtype data
18591861
self._setitem_single_column(indexer[1], value, pi)
18601862

18611863
elif len(ilocs) == len(value):

pandas/tests/indexing/test_indexing.py

+93
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
""" test fancy indexing & misc """
22

3+
import array
34
from datetime import datetime
45
import re
56
import weakref
@@ -1019,3 +1020,95 @@ def test_ser_list_indexer_exceeds_dimensions(indexer_li):
10191020
res = indexer_li(ser)[[0, 0]]
10201021
exp = Series([10, 10], index=Index([0, 0]))
10211022
tm.assert_series_equal(res, exp)
1023+
1024+
1025+
@pytest.mark.parametrize(
1026+
"value", [(0, 1), [0, 1], np.array([0, 1]), array.array("b", [0, 1])]
1027+
)
1028+
def test_scalar_setitem_with_nested_value(value):
1029+
# For numeric data, we try to unpack and thus raise for mismatching length
1030+
df = DataFrame({"A": [1, 2, 3]})
1031+
msg = "|".join(
1032+
[
1033+
"Must have equal len keys and value",
1034+
"setting an array element with a sequence",
1035+
]
1036+
)
1037+
with pytest.raises(ValueError, match=msg):
1038+
df.loc[0, "B"] = value
1039+
1040+
# TODO For object dtype this happens as well, but should we rather preserve
1041+
# the nested data and set as such?
1042+
df = DataFrame({"A": [1, 2, 3], "B": np.array([1, "a", "b"], dtype=object)})
1043+
with pytest.raises(ValueError, match="Must have equal len keys and value"):
1044+
df.loc[0, "B"] = value
1045+
# if isinstance(value, np.ndarray):
1046+
# assert (df.loc[0, "B"] == value).all()
1047+
# else:
1048+
# assert df.loc[0, "B"] == value
1049+
1050+
1051+
@pytest.mark.parametrize(
1052+
"value", [(0, 1), [0, 1], np.array([0, 1]), array.array("b", [0, 1])]
1053+
)
1054+
def test_scalar_setitem_series_with_nested_value(value, indexer_sli):
1055+
# For numeric data, we try to unpack and thus raise for mismatching length
1056+
ser = Series([1, 2, 3])
1057+
with pytest.raises(ValueError, match="setting an array element with a sequence"):
1058+
indexer_sli(ser)[0] = value
1059+
1060+
# but for object dtype we preserve the nested data and set as such
1061+
ser = Series([1, "a", "b"], dtype=object)
1062+
indexer_sli(ser)[0] = value
1063+
if isinstance(value, np.ndarray):
1064+
assert (ser.loc[0] == value).all()
1065+
else:
1066+
assert ser.loc[0] == value
1067+
1068+
1069+
@pytest.mark.parametrize(
1070+
"value", [(0.0,), [0.0], np.array([0.0]), array.array("d", [0.0])]
1071+
)
1072+
def test_scalar_setitem_with_nested_value_length1(value):
1073+
# https://github.com/pandas-dev/pandas/issues/46268
1074+
1075+
# For numeric data, assigning length-1 array to scalar position gets unpacked
1076+
df = DataFrame({"A": [1, 2, 3]})
1077+
df.loc[0, "B"] = value
1078+
expected = DataFrame({"A": [1, 2, 3], "B": [0.0, np.nan, np.nan]})
1079+
tm.assert_frame_equal(df, expected)
1080+
1081+
# but for object dtype we preserve the nested data
1082+
df = DataFrame({"A": [1, 2, 3], "B": np.array([1, "a", "b"], dtype=object)})
1083+
df.loc[0, "B"] = value
1084+
if isinstance(value, np.ndarray):
1085+
assert (df.loc[0, "B"] == value).all()
1086+
else:
1087+
assert df.loc[0, "B"] == value
1088+
1089+
1090+
@pytest.mark.parametrize(
1091+
"value", [(0.0,), [0.0], np.array([0.0]), array.array("d", [0.0])]
1092+
)
1093+
def test_scalar_setitem_series_with_nested_value_length1(value, indexer_sli):
1094+
# For numeric data, assigning length-1 array to scalar position gets unpacked
1095+
# TODO this only happens in case of ndarray, should we make this consistent
1096+
# for all list-likes? (as happens for DataFrame.(i)loc, see test above)
1097+
ser = Series([1.0, 2.0, 3.0])
1098+
if isinstance(value, np.ndarray):
1099+
indexer_sli(ser)[0] = value
1100+
expected = Series([0.0, 2.0, 3.0])
1101+
tm.assert_series_equal(ser, expected)
1102+
else:
1103+
with pytest.raises(
1104+
ValueError, match="setting an array element with a sequence"
1105+
):
1106+
indexer_sli(ser)[0] = value
1107+
1108+
# but for object dtype we preserve the nested data
1109+
ser = Series([1, "a", "b"], dtype=object)
1110+
indexer_sli(ser)[0] = value
1111+
if isinstance(value, np.ndarray):
1112+
assert (ser.loc[0] == value).all()
1113+
else:
1114+
assert ser.loc[0] == value

0 commit comments

Comments
 (0)