Skip to content

Commit 5be91f0

Browse files
jorisvandenbosschemeeseeksmachine
authored andcommitted
Backport PR pandas-dev#47763: BUG: fix regression in Series[string] setitem setting a scalar with a mask
1 parent 3876d1e commit 5be91f0

File tree

3 files changed

+28
-0
lines changed

3 files changed

+28
-0
lines changed

doc/source/whatsnew/v1.4.4.rst

+1
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ including other versions of pandas.
1515
Fixed regressions
1616
~~~~~~~~~~~~~~~~~
1717
- Fixed regression in :func:`concat` materializing :class:`Index` during sorting even if :class:`Index` was already sorted (:issue:`47501`)
18+
- Fixed regression in setting ``None`` or non-string value into a ``string``-dtype Series using a mask (:issue:`47628`)
1819
-
1920

2021
.. ---------------------------------------------------------------------------

pandas/core/arrays/string_.py

+7
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from pandas._typing import (
1818
Dtype,
1919
Scalar,
20+
npt,
2021
type_t,
2122
)
2223
from pandas.compat import pa_version_under1p01
@@ -413,6 +414,12 @@ def __setitem__(self, key, value):
413414

414415
super().__setitem__(key, value)
415416

417+
def _putmask(self, mask: npt.NDArray[np.bool_], value) -> None:
418+
# the super() method NDArrayBackedExtensionArray._putmask uses
419+
# np.putmask which doesn't properly handle None/pd.NA, so using the
420+
# base class implementation that uses __setitem__
421+
ExtensionArray._putmask(self, mask, value)
422+
416423
def astype(self, dtype, copy: bool = True):
417424
dtype = pandas_dtype(dtype)
418425

pandas/tests/arrays/string_/test_string.py

+20
Original file line numberDiff line numberDiff line change
@@ -553,3 +553,23 @@ def test_isin(dtype, request, fixed_now_ts):
553553
result = s.isin(["a", fixed_now_ts])
554554
expected = pd.Series([True, False, False])
555555
tm.assert_series_equal(result, expected)
556+
557+
558+
def test_setitem_scalar_with_mask_validation(dtype):
559+
# https://github.com/pandas-dev/pandas/issues/47628
560+
# setting None with a boolean mask (through _putmaks) should still result
561+
# in pd.NA values in the underlying array
562+
ser = pd.Series(["a", "b", "c"], dtype=dtype)
563+
mask = np.array([False, True, False])
564+
565+
ser[mask] = None
566+
assert ser.array[1] is pd.NA
567+
568+
# for other non-string we should also raise an error
569+
ser = pd.Series(["a", "b", "c"], dtype=dtype)
570+
if type(ser.array) is pd.arrays.StringArray:
571+
msg = "Cannot set non-string value"
572+
else:
573+
msg = "Scalar must be NA or str"
574+
with pytest.raises(ValueError, match=msg):
575+
ser[mask] = 1

0 commit comments

Comments
 (0)