Skip to content

Commit 8d868db

Browse files
authored
BUG: broadcasting listlike values in Series.__setitem__ GH#44265 (#44275)
* BUG: Series[int8][:3] = range(3) unnecessary upcasting to int64 * whatsnew * GH refs * BUG: broadcasting listlike values in Series.__setitem__ GH#44265 * whatsnew * separate tests
1 parent 025f892 commit 8d868db

File tree

4 files changed

+64
-7
lines changed

4 files changed

+64
-7
lines changed

doc/source/whatsnew/v1.4.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -541,6 +541,7 @@ Indexing
541541
- Bug in setting a scalar :class:`Interval` value into a :class:`Series` with ``IntervalDtype`` when the scalar's sides are floats and the values' sides are integers (:issue:`44201`)
542542
- Bug when setting string-backed :class:`Categorical` values that can be parsed to datetimes into a :class:`DatetimeArray` or :class:`Series` or :class:`DataFrame` column backed by :class:`DatetimeArray` failing to parse these strings (:issue:`44236`)
543543
- Bug in :meth:`Series.__setitem__` with an integer dtype other than ``int64`` setting with a ``range`` object unnecessarily upcasting to ``int64`` (:issue:`44261`)
544+
- Bug in :meth:`Series.__setitem__` with a boolean mask indexer setting a listlike value of length 1 incorrectly broadcasting that value (:issue:`44265`)
544545
-
545546

546547
Missing

pandas/core/series.py

+17
Original file line numberDiff line numberDiff line change
@@ -1096,9 +1096,26 @@ def __setitem__(self, key, value) -> None:
10961096
if com.is_bool_indexer(key):
10971097
key = check_bool_indexer(self.index, key)
10981098
key = np.asarray(key, dtype=bool)
1099+
1100+
if (
1101+
is_list_like(value)
1102+
and len(value) != len(self)
1103+
and not isinstance(value, Series)
1104+
and not is_object_dtype(self.dtype)
1105+
):
1106+
# Series will be reindexed to have matching length inside
1107+
# _where call below
1108+
# GH#44265
1109+
indexer = key.nonzero()[0]
1110+
self._set_values(indexer, value)
1111+
return
1112+
1113+
# otherwise with listlike other we interpret series[mask] = other
1114+
# as series[mask] = other[mask]
10991115
try:
11001116
self._where(~key, value, inplace=True)
11011117
except InvalidIndexError:
1118+
# test_where_dups
11021119
self.iloc[key] = value
11031120
return
11041121

pandas/tests/series/indexing/test_setitem.py

+40
Original file line numberDiff line numberDiff line change
@@ -1064,3 +1064,43 @@ def test_setitem_with_bool_indexer():
10641064
df.loc[[True, False, False], "a"] = 10
10651065
expected = DataFrame({"a": [10, 2, 3]})
10661066
tm.assert_frame_equal(df, expected)
1067+
1068+
1069+
@pytest.mark.parametrize("size", range(2, 6))
1070+
@pytest.mark.parametrize(
1071+
"mask", [[True, False, False, False, False], [True, False], [False]]
1072+
)
1073+
@pytest.mark.parametrize(
1074+
"item", [2.0, np.nan, np.finfo(float).max, np.finfo(float).min]
1075+
)
1076+
# Test numpy arrays, lists and tuples as the input to be
1077+
# broadcast
1078+
@pytest.mark.parametrize(
1079+
"box", [lambda x: np.array([x]), lambda x: [x], lambda x: (x,)]
1080+
)
1081+
def test_setitem_bool_indexer_dont_broadcast_length1_values(size, mask, item, box):
1082+
# GH#44265
1083+
# see also tests.series.indexing.test_where.test_broadcast
1084+
1085+
selection = np.resize(mask, size)
1086+
1087+
data = np.arange(size, dtype=float)
1088+
1089+
ser = Series(data)
1090+
1091+
if selection.sum() != 1:
1092+
msg = (
1093+
"cannot set using a list-like indexer with a different "
1094+
"length than the value"
1095+
)
1096+
with pytest.raises(ValueError, match=msg):
1097+
# GH#44265
1098+
ser[selection] = box(item)
1099+
else:
1100+
# In this corner case setting is equivalent to setting with the unboxed
1101+
# item
1102+
ser[selection] = box(item)
1103+
1104+
expected = Series(np.arange(size, dtype=float))
1105+
expected[selection] = item
1106+
tm.assert_series_equal(ser, expected)

pandas/tests/series/indexing/test_where.py

+6-7
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ def test_where_unsafe():
8888
s = Series(np.arange(10))
8989
mask = s > 5
9090

91-
msg = "cannot assign mismatch length to masked array"
91+
msg = "cannot set using a list-like indexer with a different length than the value"
9292
with pytest.raises(ValueError, match=msg):
9393
s[mask] = [5, 4, 3, 2, 1]
9494

@@ -161,13 +161,10 @@ def test_where_error():
161161
tm.assert_series_equal(s, expected)
162162

163163
# failures
164-
msg = "cannot assign mismatch length to masked array"
164+
msg = "cannot set using a list-like indexer with a different length than the value"
165165
with pytest.raises(ValueError, match=msg):
166166
s[[True, False]] = [0, 2, 3]
167-
msg = (
168-
"NumPy boolean array indexing assignment cannot assign 0 input "
169-
"values to the 1 output values where the mask is true"
170-
)
167+
171168
with pytest.raises(ValueError, match=msg):
172169
s[[True, False]] = []
173170

@@ -298,6 +295,7 @@ def test_where_setitem_invalid():
298295
"box", [lambda x: np.array([x]), lambda x: [x], lambda x: (x,)]
299296
)
300297
def test_broadcast(size, mask, item, box):
298+
# GH#8801, GH#4195
301299
selection = np.resize(mask, size)
302300

303301
data = np.arange(size, dtype=float)
@@ -309,7 +307,8 @@ def test_broadcast(size, mask, item, box):
309307
)
310308

311309
s = Series(data)
312-
s[selection] = box(item)
310+
311+
s[selection] = item
313312
tm.assert_series_equal(s, expected)
314313

315314
s = Series(data)

0 commit comments

Comments
 (0)