Skip to content

Manual Backport PR #48057 on branch 1.4.x (REGR: fix regression in scalar setitem with setting a length-1 array-like) #48161

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.4.4.rst
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ Fixed regressions
- Fixed regression in :func:`concat` materializing :class:`Index` during sorting even if :class:`Index` was already sorted (:issue:`47501`)
- Fixed regression in :meth:`DataFrame.loc` not updating the cache correctly after values were set (:issue:`47867`)
- Fixed regression in :meth:`DataFrame.loc` not aligning index in some cases when setting a :class:`DataFrame` (:issue:`47578`)
- Fixed regression in :meth:`DataFrame.loc` setting a length-1 array like value to a single value in the DataFrame (:issue:`46268`)
- Fixed regression in setting ``None`` or non-string value into a ``string``-dtype Series using a mask (:issue:`47628`)
- Fixed regression in :func:`merge` throwing an error when passing a :class:`Series` with a multi-level name (:issue:`47946`)
- Fixed regression in :meth:`DataFrame.eval` creating a copy when updating inplace (:issue:`47449`)
Expand Down
6 changes: 4 additions & 2 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1749,8 +1749,10 @@ def _setitem_with_indexer_split_path(self, indexer, value, name: str):
# We get here in one case via .loc with a all-False mask
pass

elif self._is_scalar_access(indexer):
# We are setting nested data
elif self._is_scalar_access(indexer) and is_object_dtype(
self.obj.dtypes[ilocs[0]]
):
# We are setting nested data, only possible for object dtype data
self._setitem_single_column(indexer[1], value, pi)

elif len(ilocs) == len(value):
Expand Down
93 changes: 93 additions & 0 deletions pandas/tests/indexing/test_indexing.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
""" test fancy indexing & misc """

import array
from datetime import datetime
import re
import weakref
Expand Down Expand Up @@ -985,3 +986,95 @@ def test_extension_array_cross_section_converts():

result = df.iloc[0]
tm.assert_series_equal(result, expected)


@pytest.mark.parametrize(
"value", [(0, 1), [0, 1], np.array([0, 1]), array.array("b", [0, 1])]
)
def test_scalar_setitem_with_nested_value(value):
# For numeric data, we try to unpack and thus raise for mismatching length
df = DataFrame({"A": [1, 2, 3]})
msg = "|".join(
[
"Must have equal len keys and value",
"setting an array element with a sequence",
]
)
with pytest.raises(ValueError, match=msg):
df.loc[0, "B"] = value

# TODO For object dtype this happens as well, but should we rather preserve
# the nested data and set as such?
df = DataFrame({"A": [1, 2, 3], "B": np.array([1, "a", "b"], dtype=object)})
with pytest.raises(ValueError, match="Must have equal len keys and value"):
df.loc[0, "B"] = value
# if isinstance(value, np.ndarray):
# assert (df.loc[0, "B"] == value).all()
# else:
# assert df.loc[0, "B"] == value


@pytest.mark.parametrize(
"value", [(0, 1), [0, 1], np.array([0, 1]), array.array("b", [0, 1])]
)
def test_scalar_setitem_series_with_nested_value(value, indexer_sli):
# For numeric data, we try to unpack and thus raise for mismatching length
ser = Series([1, 2, 3])
with pytest.raises(ValueError, match="setting an array element with a sequence"):
indexer_sli(ser)[0] = value

# but for object dtype we preserve the nested data and set as such
ser = Series([1, "a", "b"], dtype=object)
indexer_sli(ser)[0] = value
if isinstance(value, np.ndarray):
assert (ser.loc[0] == value).all()
else:
assert ser.loc[0] == value


@pytest.mark.parametrize(
"value", [(0.0,), [0.0], np.array([0.0]), array.array("d", [0.0])]
)
def test_scalar_setitem_with_nested_value_length1(value):
# https://github.com/pandas-dev/pandas/issues/46268

# For numeric data, assigning length-1 array to scalar position gets unpacked
df = DataFrame({"A": [1, 2, 3]})
df.loc[0, "B"] = value
expected = DataFrame({"A": [1, 2, 3], "B": [0.0, np.nan, np.nan]})
tm.assert_frame_equal(df, expected)

# but for object dtype we preserve the nested data
df = DataFrame({"A": [1, 2, 3], "B": np.array([1, "a", "b"], dtype=object)})
df.loc[0, "B"] = value
if isinstance(value, np.ndarray):
assert (df.loc[0, "B"] == value).all()
else:
assert df.loc[0, "B"] == value


@pytest.mark.parametrize(
"value", [(0.0,), [0.0], np.array([0.0]), array.array("d", [0.0])]
)
def test_scalar_setitem_series_with_nested_value_length1(value, indexer_sli):
# For numeric data, assigning length-1 array to scalar position gets unpacked
# TODO this only happens in case of ndarray, should we make this consistent
# for all list-likes? (as happens for DataFrame.(i)loc, see test above)
ser = Series([1.0, 2.0, 3.0])
if isinstance(value, np.ndarray):
indexer_sli(ser)[0] = value
expected = Series([0.0, 2.0, 3.0])
tm.assert_series_equal(ser, expected)
else:
with pytest.raises(
ValueError, match="setting an array element with a sequence"
):
indexer_sli(ser)[0] = value

# but for object dtype we preserve the nested data
ser = Series([1, "a", "b"], dtype=object)
indexer_sli(ser)[0] = value
if isinstance(value, np.ndarray):
assert (ser.loc[0] == value).all()
else:
assert ser.loc[0] == value