From 67c4e635ed5ad0bb809995eda04876e006b2bbb2 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 12 Aug 2022 15:09:37 +0200 Subject: [PATCH] REGR: fix regression in scalar setitem with setting a length-1 array-like --- doc/source/whatsnew/v1.4.4.rst | 1 + pandas/core/indexing.py | 6 +- pandas/tests/indexing/test_indexing.py | 93 ++++++++++++++++++++++++++ 3 files changed, 98 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.4.4.rst b/doc/source/whatsnew/v1.4.4.rst index 57b8fdee5888a..32b4d1f8a8012 100644 --- a/doc/source/whatsnew/v1.4.4.rst +++ b/doc/source/whatsnew/v1.4.4.rst @@ -18,6 +18,7 @@ Fixed regressions - Fixed regression in :func:`concat` materializing :class:`Index` during sorting even if :class:`Index` was already sorted (:issue:`47501`) - Fixed regression in :meth:`DataFrame.loc` not updating the cache correctly after values were set (:issue:`47867`) - Fixed regression in :meth:`DataFrame.loc` not aligning index in some cases when setting a :class:`DataFrame` (:issue:`47578`) +- Fixed regression in :meth:`DataFrame.loc` setting a length-1 array like value to a single value in the DataFrame (:issue:`46268`) - Fixed regression in setting ``None`` or non-string value into a ``string``-dtype Series using a mask (:issue:`47628`) - diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index b143e1e50aa6c..d7f7941f017de 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1854,8 +1854,10 @@ def _setitem_with_indexer_split_path(self, indexer, value, name: str): # We get here in one case via .loc with a all-False mask pass - elif self._is_scalar_access(indexer): - # We are setting nested data + elif self._is_scalar_access(indexer) and is_object_dtype( + self.obj.dtypes[ilocs[0]] + ): + # We are setting nested data, only possible for object dtype data self._setitem_single_column(indexer[1], value, pi) elif len(ilocs) == len(value): diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 9d10e487e0cc2..069e5a62895af 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -1,5 +1,6 @@ """ test fancy indexing & misc """ +import array from datetime import datetime import re import weakref @@ -1019,3 +1020,95 @@ def test_ser_list_indexer_exceeds_dimensions(indexer_li): res = indexer_li(ser)[[0, 0]] exp = Series([10, 10], index=Index([0, 0])) tm.assert_series_equal(res, exp) + + +@pytest.mark.parametrize( + "value", [(0, 1), [0, 1], np.array([0, 1]), array.array("b", [0, 1])] +) +def test_scalar_setitem_with_nested_value(value): + # For numeric data, we try to unpack and thus raise for mismatching length + df = DataFrame({"A": [1, 2, 3]}) + msg = "|".join( + [ + "Must have equal len keys and value", + "setting an array element with a sequence", + ] + ) + with pytest.raises(ValueError, match=msg): + df.loc[0, "B"] = value + + # TODO For object dtype this happens as well, but should we rather preserve + # the nested data and set as such? + df = DataFrame({"A": [1, 2, 3], "B": np.array([1, "a", "b"], dtype=object)}) + with pytest.raises(ValueError, match="Must have equal len keys and value"): + df.loc[0, "B"] = value + # if isinstance(value, np.ndarray): + # assert (df.loc[0, "B"] == value).all() + # else: + # assert df.loc[0, "B"] == value + + +@pytest.mark.parametrize( + "value", [(0, 1), [0, 1], np.array([0, 1]), array.array("b", [0, 1])] +) +def test_scalar_setitem_series_with_nested_value(value, indexer_sli): + # For numeric data, we try to unpack and thus raise for mismatching length + ser = Series([1, 2, 3]) + with pytest.raises(ValueError, match="setting an array element with a sequence"): + indexer_sli(ser)[0] = value + + # but for object dtype we preserve the nested data and set as such + ser = Series([1, "a", "b"], dtype=object) + indexer_sli(ser)[0] = value + if isinstance(value, np.ndarray): + assert (ser.loc[0] == value).all() + else: + assert ser.loc[0] == value + + +@pytest.mark.parametrize( + "value", [(0.0,), [0.0], np.array([0.0]), array.array("d", [0.0])] +) +def test_scalar_setitem_with_nested_value_length1(value): + # https://github.com/pandas-dev/pandas/issues/46268 + + # For numeric data, assigning length-1 array to scalar position gets unpacked + df = DataFrame({"A": [1, 2, 3]}) + df.loc[0, "B"] = value + expected = DataFrame({"A": [1, 2, 3], "B": [0.0, np.nan, np.nan]}) + tm.assert_frame_equal(df, expected) + + # but for object dtype we preserve the nested data + df = DataFrame({"A": [1, 2, 3], "B": np.array([1, "a", "b"], dtype=object)}) + df.loc[0, "B"] = value + if isinstance(value, np.ndarray): + assert (df.loc[0, "B"] == value).all() + else: + assert df.loc[0, "B"] == value + + +@pytest.mark.parametrize( + "value", [(0.0,), [0.0], np.array([0.0]), array.array("d", [0.0])] +) +def test_scalar_setitem_series_with_nested_value_length1(value, indexer_sli): + # For numeric data, assigning length-1 array to scalar position gets unpacked + # TODO this only happens in case of ndarray, should we make this consistent + # for all list-likes? (as happens for DataFrame.(i)loc, see test above) + ser = Series([1.0, 2.0, 3.0]) + if isinstance(value, np.ndarray): + indexer_sli(ser)[0] = value + expected = Series([0.0, 2.0, 3.0]) + tm.assert_series_equal(ser, expected) + else: + with pytest.raises( + ValueError, match="setting an array element with a sequence" + ): + indexer_sli(ser)[0] = value + + # but for object dtype we preserve the nested data + ser = Series([1, "a", "b"], dtype=object) + indexer_sli(ser)[0] = value + if isinstance(value, np.ndarray): + assert (ser.loc[0] == value).all() + else: + assert ser.loc[0] == value