Skip to content

Commit 1b1dd36

Browse files
BUG: fix regression in Series[string] setitem setting a scalar with a mask (#47763)
* BUG: fix regression in Series[string] setitem setting a scalar with a mask * expand test for non-string value
1 parent efd15b7 commit 1b1dd36

File tree

3 files changed

+28
-0
lines changed

3 files changed

+28
-0
lines changed

doc/source/whatsnew/v1.4.4.rst

+1
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ including other versions of pandas.
1515
Fixed regressions
1616
~~~~~~~~~~~~~~~~~
1717
- Fixed regression in :func:`concat` materializing :class:`Index` during sorting even if :class:`Index` was already sorted (:issue:`47501`)
18+
- Fixed regression in setting ``None`` or non-string value into a ``string``-dtype Series using a mask (:issue:`47628`)
1819
-
1920

2021
.. ---------------------------------------------------------------------------

pandas/core/arrays/string_.py

+7
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from pandas._typing import (
1515
Dtype,
1616
Scalar,
17+
npt,
1718
type_t,
1819
)
1920
from pandas.compat import pa_version_under1p01
@@ -410,6 +411,12 @@ def __setitem__(self, key, value):
410411

411412
super().__setitem__(key, value)
412413

414+
def _putmask(self, mask: npt.NDArray[np.bool_], value) -> None:
415+
# the super() method NDArrayBackedExtensionArray._putmask uses
416+
# np.putmask which doesn't properly handle None/pd.NA, so using the
417+
# base class implementation that uses __setitem__
418+
ExtensionArray._putmask(self, mask, value)
419+
413420
def astype(self, dtype, copy: bool = True):
414421
dtype = pandas_dtype(dtype)
415422

pandas/tests/arrays/string_/test_string.py

+20
Original file line numberDiff line numberDiff line change
@@ -588,3 +588,23 @@ def test_isin(dtype, fixed_now_ts):
588588
result = s.isin(["a", fixed_now_ts])
589589
expected = pd.Series([True, False, False])
590590
tm.assert_series_equal(result, expected)
591+
592+
593+
def test_setitem_scalar_with_mask_validation(dtype):
594+
# https://github.com/pandas-dev/pandas/issues/47628
595+
# setting None with a boolean mask (through _putmaks) should still result
596+
# in pd.NA values in the underlying array
597+
ser = pd.Series(["a", "b", "c"], dtype=dtype)
598+
mask = np.array([False, True, False])
599+
600+
ser[mask] = None
601+
assert ser.array[1] is pd.NA
602+
603+
# for other non-string we should also raise an error
604+
ser = pd.Series(["a", "b", "c"], dtype=dtype)
605+
if type(ser.array) is pd.arrays.StringArray:
606+
msg = "Cannot set non-string value"
607+
else:
608+
msg = "Scalar must be NA or str"
609+
with pytest.raises(ValueError, match=msg):
610+
ser[mask] = 1

0 commit comments

Comments
 (0)