From 67be0dafd862fc2f134717359bd0ec74769f5324 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 19 Aug 2022 19:26:20 +0200 Subject: [PATCH] Backport PR #48057: REGR: fix regression in scalar setitem with setting a length-1 array-like --- doc/source/whatsnew/v1.4.4.rst | 1 + pandas/core/indexing.py | 6 +- pandas/tests/indexing/test_indexing.py | 93 ++++++++++++++++++++++++++ 3 files changed, 98 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.4.4.rst b/doc/source/whatsnew/v1.4.4.rst index 96e4ad4321c60..92870c93cdbbb 100644 --- a/doc/source/whatsnew/v1.4.4.rst +++ b/doc/source/whatsnew/v1.4.4.rst @@ -18,6 +18,7 @@ Fixed regressions - Fixed regression in :func:`concat` materializing :class:`Index` during sorting even if :class:`Index` was already sorted (:issue:`47501`) - Fixed regression in :meth:`DataFrame.loc` not updating the cache correctly after values were set (:issue:`47867`) - Fixed regression in :meth:`DataFrame.loc` not aligning index in some cases when setting a :class:`DataFrame` (:issue:`47578`) +- Fixed regression in :meth:`DataFrame.loc` setting a length-1 array like value to a single value in the DataFrame (:issue:`46268`) - Fixed regression in setting ``None`` or non-string value into a ``string``-dtype Series using a mask (:issue:`47628`) - Fixed regression in :func:`merge` throwing an error when passing a :class:`Series` with a multi-level name (:issue:`47946`) - Fixed regression in :meth:`DataFrame.eval` creating a copy when updating inplace (:issue:`47449`) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 60de68052639c..9659e5b157d9a 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1749,8 +1749,10 @@ def _setitem_with_indexer_split_path(self, indexer, value, name: str): # We get here in one case via .loc with a all-False mask pass - elif self._is_scalar_access(indexer): - # We are setting nested data + elif self._is_scalar_access(indexer) and is_object_dtype( + self.obj.dtypes[ilocs[0]] + ): + # We are setting nested data, only possible for object dtype data self._setitem_single_column(indexer[1], value, pi) elif len(ilocs) == len(value): diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 36176bb8194d4..bf3ad4a832cbc 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -1,5 +1,6 @@ """ test fancy indexing & misc """ +import array from datetime import datetime import re import weakref @@ -985,3 +986,95 @@ def test_extension_array_cross_section_converts(): result = df.iloc[0] tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "value", [(0, 1), [0, 1], np.array([0, 1]), array.array("b", [0, 1])] +) +def test_scalar_setitem_with_nested_value(value): + # For numeric data, we try to unpack and thus raise for mismatching length + df = DataFrame({"A": [1, 2, 3]}) + msg = "|".join( + [ + "Must have equal len keys and value", + "setting an array element with a sequence", + ] + ) + with pytest.raises(ValueError, match=msg): + df.loc[0, "B"] = value + + # TODO For object dtype this happens as well, but should we rather preserve + # the nested data and set as such? + df = DataFrame({"A": [1, 2, 3], "B": np.array([1, "a", "b"], dtype=object)}) + with pytest.raises(ValueError, match="Must have equal len keys and value"): + df.loc[0, "B"] = value + # if isinstance(value, np.ndarray): + # assert (df.loc[0, "B"] == value).all() + # else: + # assert df.loc[0, "B"] == value + + +@pytest.mark.parametrize( + "value", [(0, 1), [0, 1], np.array([0, 1]), array.array("b", [0, 1])] +) +def test_scalar_setitem_series_with_nested_value(value, indexer_sli): + # For numeric data, we try to unpack and thus raise for mismatching length + ser = Series([1, 2, 3]) + with pytest.raises(ValueError, match="setting an array element with a sequence"): + indexer_sli(ser)[0] = value + + # but for object dtype we preserve the nested data and set as such + ser = Series([1, "a", "b"], dtype=object) + indexer_sli(ser)[0] = value + if isinstance(value, np.ndarray): + assert (ser.loc[0] == value).all() + else: + assert ser.loc[0] == value + + +@pytest.mark.parametrize( + "value", [(0.0,), [0.0], np.array([0.0]), array.array("d", [0.0])] +) +def test_scalar_setitem_with_nested_value_length1(value): + # https://github.com/pandas-dev/pandas/issues/46268 + + # For numeric data, assigning length-1 array to scalar position gets unpacked + df = DataFrame({"A": [1, 2, 3]}) + df.loc[0, "B"] = value + expected = DataFrame({"A": [1, 2, 3], "B": [0.0, np.nan, np.nan]}) + tm.assert_frame_equal(df, expected) + + # but for object dtype we preserve the nested data + df = DataFrame({"A": [1, 2, 3], "B": np.array([1, "a", "b"], dtype=object)}) + df.loc[0, "B"] = value + if isinstance(value, np.ndarray): + assert (df.loc[0, "B"] == value).all() + else: + assert df.loc[0, "B"] == value + + +@pytest.mark.parametrize( + "value", [(0.0,), [0.0], np.array([0.0]), array.array("d", [0.0])] +) +def test_scalar_setitem_series_with_nested_value_length1(value, indexer_sli): + # For numeric data, assigning length-1 array to scalar position gets unpacked + # TODO this only happens in case of ndarray, should we make this consistent + # for all list-likes? (as happens for DataFrame.(i)loc, see test above) + ser = Series([1.0, 2.0, 3.0]) + if isinstance(value, np.ndarray): + indexer_sli(ser)[0] = value + expected = Series([0.0, 2.0, 3.0]) + tm.assert_series_equal(ser, expected) + else: + with pytest.raises( + ValueError, match="setting an array element with a sequence" + ): + indexer_sli(ser)[0] = value + + # but for object dtype we preserve the nested data + ser = Series([1, "a", "b"], dtype=object) + indexer_sli(ser)[0] = value + if isinstance(value, np.ndarray): + assert (ser.loc[0] == value).all() + else: + assert ser.loc[0] == value