Manual Backport PR pandas-dev#48057 on branch 1.4.x (REGR: fix regression in scalar setitem with setting a length-1 array-like) (pandas-dev#48161)

mroeschke · jorisvandenbossche · web-flow · commit 3886bf53605f · 2022-08-19T21:23:15.000+02:00
Backport PR pandas-dev#48057: REGR: fix regression in scalar setitem with setting a length-1 array-like Co-authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
diff --git a/doc/source/whatsnew/v1.4.4.rst b/doc/source/whatsnew/v1.4.4.rst
@@ -18,6 +18,7 @@ Fixed regressions
 - Fixed regression in :func:`concat` materializing :class:`Index` during sorting even if :class:`Index` was already sorted (:issue:`47501`)
 - Fixed regression in :meth:`DataFrame.loc` not updating the cache correctly after values were set (:issue:`47867`)
 - Fixed regression in :meth:`DataFrame.loc` not aligning index in some cases when setting a :class:`DataFrame` (:issue:`47578`)
+- Fixed regression in :meth:`DataFrame.loc` setting a length-1 array like value to a single value in the DataFrame (:issue:`46268`)
 - Fixed regression in setting ``None`` or non-string value into a ``string``-dtype Series using a mask (:issue:`47628`)
 - Fixed regression in :func:`merge` throwing an error when passing a :class:`Series` with a multi-level name (:issue:`47946`)
 - Fixed regression in :meth:`DataFrame.eval` creating a copy when updating inplace (:issue:`47449`)
diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
@@ -1749,8 +1749,10 @@ def _setitem_with_indexer_split_path(self, indexer, value, name: str):
                 # We get here in one case via .loc with a all-False mask
                 pass
 
-            elif self._is_scalar_access(indexer):
-                # We are setting nested data
+            elif self._is_scalar_access(indexer) and is_object_dtype(
+                self.obj.dtypes[ilocs[0]]
+            ):
+                # We are setting nested data, only possible for object dtype data
                 self._setitem_single_column(indexer[1], value, pi)
 
             elif len(ilocs) == len(value):
diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py
@@ -1,5 +1,6 @@
 """ test fancy indexing & misc """
 
+import array
 from datetime import datetime
 import re
 import weakref
@@ -985,3 +986,95 @@ def test_extension_array_cross_section_converts():
 
     result = df.iloc[0]
     tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "value", [(0, 1), [0, 1], np.array([0, 1]), array.array("b", [0, 1])]
+)
+def test_scalar_setitem_with_nested_value(value):
+    # For numeric data, we try to unpack and thus raise for mismatching length
+    df = DataFrame({"A": [1, 2, 3]})
+    msg = "|".join(
+        [
+            "Must have equal len keys and value",
+            "setting an array element with a sequence",
+        ]
+    )
+    with pytest.raises(ValueError, match=msg):
+        df.loc[0, "B"] = value
+
+    # TODO For object dtype this happens as well, but should we rather preserve
+    # the nested data and set as such?
+    df = DataFrame({"A": [1, 2, 3], "B": np.array([1, "a", "b"], dtype=object)})
+    with pytest.raises(ValueError, match="Must have equal len keys and value"):
+        df.loc[0, "B"] = value
+    # if isinstance(value, np.ndarray):
+    #     assert (df.loc[0, "B"] == value).all()
+    # else:
+    #     assert df.loc[0, "B"] == value
+
+
+@pytest.mark.parametrize(
+    "value", [(0, 1), [0, 1], np.array([0, 1]), array.array("b", [0, 1])]
+)
+def test_scalar_setitem_series_with_nested_value(value, indexer_sli):
+    # For numeric data, we try to unpack and thus raise for mismatching length
+    ser = Series([1, 2, 3])
+    with pytest.raises(ValueError, match="setting an array element with a sequence"):
+        indexer_sli(ser)[0] = value
+
+    # but for object dtype we preserve the nested data and set as such
+    ser = Series([1, "a", "b"], dtype=object)
+    indexer_sli(ser)[0] = value
+    if isinstance(value, np.ndarray):
+        assert (ser.loc[0] == value).all()
+    else:
+        assert ser.loc[0] == value
+
+
+@pytest.mark.parametrize(
+    "value", [(0.0,), [0.0], np.array([0.0]), array.array("d", [0.0])]
+)
+def test_scalar_setitem_with_nested_value_length1(value):
+    # https://github.com/pandas-dev/pandas/issues/46268
+
+    # For numeric data, assigning length-1 array to scalar position gets unpacked
+    df = DataFrame({"A": [1, 2, 3]})
+    df.loc[0, "B"] = value
+    expected = DataFrame({"A": [1, 2, 3], "B": [0.0, np.nan, np.nan]})
+    tm.assert_frame_equal(df, expected)
+
+    # but for object dtype we preserve the nested data
+    df = DataFrame({"A": [1, 2, 3], "B": np.array([1, "a", "b"], dtype=object)})
+    df.loc[0, "B"] = value
+    if isinstance(value, np.ndarray):
+        assert (df.loc[0, "B"] == value).all()
+    else:
+        assert df.loc[0, "B"] == value
+
+
+@pytest.mark.parametrize(
+    "value", [(0.0,), [0.0], np.array([0.0]), array.array("d", [0.0])]
+)
+def test_scalar_setitem_series_with_nested_value_length1(value, indexer_sli):
+    # For numeric data, assigning length-1 array to scalar position gets unpacked
+    # TODO this only happens in case of ndarray, should we make this consistent
+    # for all list-likes? (as happens for DataFrame.(i)loc, see test above)
+    ser = Series([1.0, 2.0, 3.0])
+    if isinstance(value, np.ndarray):
+        indexer_sli(ser)[0] = value
+        expected = Series([0.0, 2.0, 3.0])
+        tm.assert_series_equal(ser, expected)
+    else:
+        with pytest.raises(
+            ValueError, match="setting an array element with a sequence"
+        ):
+            indexer_sli(ser)[0] = value
+
+    # but for object dtype we preserve the nested data
+    ser = Series([1, "a", "b"], dtype=object)
+    indexer_sli(ser)[0] = value
+    if isinstance(value, np.ndarray):
+        assert (ser.loc[0] == value).all()
+    else:
+        assert ser.loc[0] == value