Skip to content

Commit db68cd5

Browse files
[backport 2.3.x] ERR (string dtype): harmonize setitem error message for python and pyarrow storage (pandas-dev#60219) (pandas-dev#60232)
(cherry picked from commit 692ea6f)
1 parent 64f9907 commit db68cd5

File tree

11 files changed

+36
-32
lines changed

11 files changed

+36
-32
lines changed

pandas/core/arrays/arrow/array.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1134,7 +1134,7 @@ def fillna(
11341134
try:
11351135
fill_value = self._box_pa(value, pa_type=self._pa_array.type)
11361136
except pa.ArrowTypeError as err:
1137-
msg = f"Invalid value '{str(value)}' for dtype {self.dtype}"
1137+
msg = f"Invalid value '{value!s}' for dtype '{self.dtype}'"
11381138
raise TypeError(msg) from err
11391139

11401140
try:
@@ -2126,7 +2126,7 @@ def _maybe_convert_setitem_value(self, value):
21262126
try:
21272127
value = self._box_pa(value, self._pa_array.type)
21282128
except pa.ArrowTypeError as err:
2129-
msg = f"Invalid value '{str(value)}' for dtype {self.dtype}"
2129+
msg = f"Invalid value '{value!s}' for dtype '{self.dtype}'"
21302130
raise TypeError(msg) from err
21312131
return value
21322132

pandas/core/arrays/masked.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -302,7 +302,7 @@ def _validate_setitem_value(self, value):
302302

303303
# Note: without the "str" here, the f-string rendering raises in
304304
# py38 builds.
305-
raise TypeError(f"Invalid value '{str(value)}' for dtype {self.dtype}")
305+
raise TypeError(f"Invalid value '{value!s}' for dtype '{self.dtype}'")
306306

307307
def __setitem__(self, key, value) -> None:
308308
key = check_array_indexer(self, key)

pandas/core/arrays/string_.py

+9-3
Original file line numberDiff line numberDiff line change
@@ -654,7 +654,8 @@ def _validate_scalar(self, value):
654654
return self.dtype.na_value
655655
elif not isinstance(value, str):
656656
raise TypeError(
657-
f"Cannot set non-string value '{value}' into a string array."
657+
f"Invalid value '{value}' for dtype '{self.dtype}'. Value should be a "
658+
f"string or missing value, got '{type(value).__name__}' instead."
658659
)
659660
return value
660661

@@ -743,7 +744,9 @@ def __setitem__(self, key, value) -> None:
743744
value = self.dtype.na_value
744745
elif not isinstance(value, str):
745746
raise TypeError(
746-
f"Cannot set non-string value '{value}' into a StringArray."
747+
f"Invalid value '{value}' for dtype '{self.dtype}'. Value should "
748+
f"be a string or missing value, got '{type(value).__name__}' "
749+
"instead."
747750
)
748751
else:
749752
if not is_array_like(value):
@@ -753,7 +756,10 @@ def __setitem__(self, key, value) -> None:
753756
# compatible, compatibility with arrow backed strings
754757
value = np.asarray(value)
755758
if len(value) and not lib.is_string_array(value, skipna=True):
756-
raise TypeError("Must provide strings.")
759+
raise TypeError(
760+
"Invalid value for dtype 'str'. Value should be a "
761+
"string or missing value (or array of those)."
762+
)
757763

758764
mask = isna(value)
759765
if mask.any():

pandas/core/arrays/string_arrow.py

+12-3
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,10 @@ def insert(self, loc: int, item) -> ArrowStringArray:
215215
if self.dtype.na_value is np.nan and item is np.nan:
216216
item = libmissing.NA
217217
if not isinstance(item, str) and item is not libmissing.NA:
218-
raise TypeError("Scalar must be NA or str")
218+
raise TypeError(
219+
f"Invalid value '{item}' for dtype 'str'. Value should be a "
220+
f"string or missing value, got '{type(item).__name__}' instead."
221+
)
219222
return super().insert(loc, item)
220223

221224
def _convert_bool_result(self, values, na=lib.no_default, method_name=None):
@@ -249,13 +252,19 @@ def _maybe_convert_setitem_value(self, value):
249252
if isna(value):
250253
value = None
251254
elif not isinstance(value, str):
252-
raise TypeError("Scalar must be NA or str")
255+
raise TypeError(
256+
f"Invalid value '{value}' for dtype 'str'. Value should be a "
257+
f"string or missing value, got '{type(value).__name__}' instead."
258+
)
253259
else:
254260
value = np.array(value, dtype=object, copy=True)
255261
value[isna(value)] = None
256262
for v in value:
257263
if not (v is None or isinstance(v, str)):
258-
raise TypeError("Must provide strings")
264+
raise TypeError(
265+
"Invalid value for dtype 'str'. Value should be a "
266+
"string or missing value (or array of those)."
267+
)
259268
return super()._maybe_convert_setitem_value(value)
260269

261270
def isin(self, values: ArrayLike) -> npt.NDArray[np.bool_]:

pandas/tests/arrays/masked/test_indexing.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
class TestSetitemValidation:
1010
def _check_setitem_invalid(self, arr, invalid):
11-
msg = f"Invalid value '{str(invalid)}' for dtype {arr.dtype}"
11+
msg = f"Invalid value '{invalid!s}' for dtype '{arr.dtype}'"
1212
msg = re.escape(msg)
1313
with pytest.raises(TypeError, match=msg):
1414
arr[0] = invalid

pandas/tests/arrays/string_/test_string.py

+4-13
Original file line numberDiff line numberDiff line change
@@ -108,14 +108,11 @@ def test_none_to_nan(cls, dtype):
108108
def test_setitem_validates(cls, dtype):
109109
arr = cls._from_sequence(["a", "b"], dtype=dtype)
110110

111-
if dtype.storage == "python":
112-
msg = "Cannot set non-string value '10' into a StringArray."
113-
else:
114-
msg = "Scalar must be NA or str"
111+
msg = "Invalid value '10' for dtype 'str"
115112
with pytest.raises(TypeError, match=msg):
116113
arr[0] = 10
117114

118-
msg = "Must provide strings"
115+
msg = "Invalid value for dtype 'str"
119116
with pytest.raises(TypeError, match=msg):
120117
arr[:] = np.array([1, 2])
121118

@@ -510,10 +507,7 @@ def test_fillna_args(dtype):
510507
expected = pd.array(["a", "b"], dtype=dtype)
511508
tm.assert_extension_array_equal(res, expected)
512509

513-
if dtype.storage == "pyarrow":
514-
msg = "Invalid value '1' for dtype str"
515-
else:
516-
msg = "Cannot set non-string value '1' into a StringArray."
510+
msg = "Invalid value '1' for dtype 'str"
517511
with pytest.raises(TypeError, match=msg):
518512
arr.fillna(value=1)
519513

@@ -754,10 +748,7 @@ def test_setitem_scalar_with_mask_validation(dtype):
754748

755749
# for other non-string we should also raise an error
756750
ser = pd.Series(["a", "b", "c"], dtype=dtype)
757-
if dtype.storage == "python":
758-
msg = "Cannot set non-string value"
759-
else:
760-
msg = "Scalar must be NA or str"
751+
msg = "Invalid value '1' for dtype 'str"
761752
with pytest.raises(TypeError, match=msg):
762753
ser[mask] = 1
763754

pandas/tests/frame/indexing/test_indexing.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1340,7 +1340,7 @@ def test_setting_mismatched_na_into_nullable_fails(
13401340
r"timedelta64\[ns\] cannot be converted to (Floating|Integer)Dtype",
13411341
r"datetime64\[ns\] cannot be converted to (Floating|Integer)Dtype",
13421342
"'values' contains non-numeric NA",
1343-
r"Invalid value '.*' for dtype (U?Int|Float)\d{1,2}",
1343+
r"Invalid value '.*' for dtype '(U?Int|Float)\d{1,2}'",
13441344
]
13451345
)
13461346
with pytest.raises(TypeError, match=msg):

pandas/tests/frame/indexing/test_where.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -976,7 +976,7 @@ def test_where_nullable_invalid_na(frame_or_series, any_numeric_ea_dtype):
976976

977977
mask = np.array([True, True, False], ndmin=obj.ndim).T
978978

979-
msg = r"Invalid value '.*' for dtype (U?Int|Float)\d{1,2}"
979+
msg = r"Invalid value '.*' for dtype '(U?Int|Float)\d{1,2}'"
980980

981981
for null in tm.NP_NAT_OBJECTS + [pd.NaT]:
982982
# NaT is an NA value that we should *not* cast to pd.NA dtype
@@ -1091,7 +1091,7 @@ def test_where_int_overflow(replacement, using_infer_string):
10911091
df = DataFrame([[1.0, 2e25, "nine"], [np.nan, 0.1, None]])
10921092
if using_infer_string and replacement not in (None, "snake"):
10931093
with pytest.raises(
1094-
TypeError, match="Cannot set non-string value|Scalar must be NA or str"
1094+
TypeError, match=f"Invalid value '{replacement}' for dtype 'str'"
10951095
):
10961096
df.where(pd.notnull(df), replacement)
10971097
return

pandas/tests/indexing/test_indexing.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -728,9 +728,7 @@ def run_tests(df, rhs, right_loc, right_iloc):
728728
right_iloc["joe"] = [1.0, "@-28", "@-20", "@-12", 17.0]
729729
right_iloc["jolie"] = ["@2", -26.0, -18.0, -10.0, "@18"]
730730
if using_infer_string:
731-
with pytest.raises(
732-
TypeError, match="Must provide strings|Scalar must be NA or str"
733-
):
731+
with pytest.raises(TypeError, match="Invalid value"):
734732
with tm.assert_produces_warning(
735733
FutureWarning, match="incompatible dtype"
736734
):

pandas/tests/indexing/test_loc.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1292,7 +1292,7 @@ def test_loc_setitem_str_to_small_float_conversion_type(self, using_infer_string
12921292
# assigning with loc/iloc attempts to set the values inplace, which
12931293
# in this case is successful
12941294
if using_infer_string:
1295-
with pytest.raises(TypeError, match="Must provide strings"):
1295+
with pytest.raises(TypeError, match="Invalid value"):
12961296
result.loc[result.index, "A"] = [float(x) for x in col_data]
12971297
else:
12981298
result.loc[result.index, "A"] = [float(x) for x in col_data]

pandas/tests/series/indexing/test_setitem.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -888,7 +888,7 @@ def test_index_where(self, obj, key, expected, warn, val, using_infer_string):
888888
mask[key] = True
889889

890890
if using_infer_string and obj.dtype == object:
891-
with pytest.raises(TypeError, match="Scalar must"):
891+
with pytest.raises(TypeError, match="Invalid value"):
892892
Index(obj).where(~mask, val)
893893
else:
894894
res = Index(obj).where(~mask, val)
@@ -901,7 +901,7 @@ def test_index_putmask(self, obj, key, expected, warn, val, using_infer_string):
901901
mask[key] = True
902902

903903
if using_infer_string and obj.dtype == object:
904-
with pytest.raises(TypeError, match="Scalar must"):
904+
with pytest.raises(TypeError, match="Invalid value"):
905905
Index(obj).putmask(mask, val)
906906
else:
907907
res = Index(obj).putmask(mask, val)

0 commit comments

Comments
 (0)