Skip to content

Commit 692ea6f

Browse files
ERR (string dtype): harmonize setitem error message for python and pyarrow storage (#60219)
1 parent 0937c95 commit 692ea6f

File tree

10 files changed

+35
-29
lines changed

10 files changed

+35
-29
lines changed

pandas/core/arrays/arrow/array.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1145,7 +1145,7 @@ def fillna(
11451145
try:
11461146
fill_value = self._box_pa(value, pa_type=self._pa_array.type)
11471147
except pa.ArrowTypeError as err:
1148-
msg = f"Invalid value '{value!s}' for dtype {self.dtype}"
1148+
msg = f"Invalid value '{value!s}' for dtype '{self.dtype}'"
11491149
raise TypeError(msg) from err
11501150

11511151
try:
@@ -2136,7 +2136,7 @@ def _maybe_convert_setitem_value(self, value):
21362136
try:
21372137
value = self._box_pa(value, self._pa_array.type)
21382138
except pa.ArrowTypeError as err:
2139-
msg = f"Invalid value '{value!s}' for dtype {self.dtype}"
2139+
msg = f"Invalid value '{value!s}' for dtype '{self.dtype}'"
21402140
raise TypeError(msg) from err
21412141
return value
21422142

pandas/core/arrays/masked.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -286,7 +286,7 @@ def _validate_setitem_value(self, value):
286286

287287
# Note: without the "str" here, the f-string rendering raises in
288288
# py38 builds.
289-
raise TypeError(f"Invalid value '{value!s}' for dtype {self.dtype}")
289+
raise TypeError(f"Invalid value '{value!s}' for dtype '{self.dtype}'")
290290

291291
def __setitem__(self, key, value) -> None:
292292
key = check_array_indexer(self, key)

pandas/core/arrays/string_.py

+9-3
Original file line numberDiff line numberDiff line change
@@ -652,7 +652,8 @@ def _validate_scalar(self, value):
652652
return self.dtype.na_value
653653
elif not isinstance(value, str):
654654
raise TypeError(
655-
f"Cannot set non-string value '{value}' into a string array."
655+
f"Invalid value '{value}' for dtype '{self.dtype}'. Value should be a "
656+
f"string or missing value, got '{type(value).__name__}' instead."
656657
)
657658
return value
658659

@@ -743,7 +744,9 @@ def __setitem__(self, key, value) -> None:
743744
value = self.dtype.na_value
744745
elif not isinstance(value, str):
745746
raise TypeError(
746-
f"Cannot set non-string value '{value}' into a StringArray."
747+
f"Invalid value '{value}' for dtype '{self.dtype}'. Value should "
748+
f"be a string or missing value, got '{type(value).__name__}' "
749+
"instead."
747750
)
748751
else:
749752
if not is_array_like(value):
@@ -753,7 +756,10 @@ def __setitem__(self, key, value) -> None:
753756
# compatible, compatibility with arrow backed strings
754757
value = np.asarray(value)
755758
if len(value) and not lib.is_string_array(value, skipna=True):
756-
raise TypeError("Must provide strings.")
759+
raise TypeError(
760+
"Invalid value for dtype 'str'. Value should be a "
761+
"string or missing value (or array of those)."
762+
)
757763

758764
mask = isna(value)
759765
if mask.any():

pandas/core/arrays/string_arrow.py

+12-3
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,10 @@ def insert(self, loc: int, item) -> ArrowStringArray:
223223
if self.dtype.na_value is np.nan and item is np.nan:
224224
item = libmissing.NA
225225
if not isinstance(item, str) and item is not libmissing.NA:
226-
raise TypeError("Scalar must be NA or str")
226+
raise TypeError(
227+
f"Invalid value '{item}' for dtype 'str'. Value should be a "
228+
f"string or missing value, got '{type(item).__name__}' instead."
229+
)
227230
return super().insert(loc, item)
228231

229232
def _convert_bool_result(self, values, na=lib.no_default, method_name=None):
@@ -255,13 +258,19 @@ def _maybe_convert_setitem_value(self, value):
255258
if isna(value):
256259
value = None
257260
elif not isinstance(value, str):
258-
raise TypeError("Scalar must be NA or str")
261+
raise TypeError(
262+
f"Invalid value '{value}' for dtype 'str'. Value should be a "
263+
f"string or missing value, got '{type(value).__name__}' instead."
264+
)
259265
else:
260266
value = np.array(value, dtype=object, copy=True)
261267
value[isna(value)] = None
262268
for v in value:
263269
if not (v is None or isinstance(v, str)):
264-
raise TypeError("Must provide strings")
270+
raise TypeError(
271+
"Invalid value for dtype 'str'. Value should be a "
272+
"string or missing value (or array of those)."
273+
)
265274
return super()._maybe_convert_setitem_value(value)
266275

267276
def isin(self, values: ArrayLike) -> npt.NDArray[np.bool_]:

pandas/tests/arrays/masked/test_indexing.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
class TestSetitemValidation:
1010
def _check_setitem_invalid(self, arr, invalid):
11-
msg = f"Invalid value '{invalid!s}' for dtype {arr.dtype}"
11+
msg = f"Invalid value '{invalid!s}' for dtype '{arr.dtype}'"
1212
msg = re.escape(msg)
1313
with pytest.raises(TypeError, match=msg):
1414
arr[0] = invalid

pandas/tests/arrays/string_/test_string.py

+4-13
Original file line numberDiff line numberDiff line change
@@ -109,14 +109,11 @@ def test_none_to_nan(cls, dtype):
109109
def test_setitem_validates(cls, dtype):
110110
arr = cls._from_sequence(["a", "b"], dtype=dtype)
111111

112-
if dtype.storage == "python":
113-
msg = "Cannot set non-string value '10' into a StringArray."
114-
else:
115-
msg = "Scalar must be NA or str"
112+
msg = "Invalid value '10' for dtype 'str"
116113
with pytest.raises(TypeError, match=msg):
117114
arr[0] = 10
118115

119-
msg = "Must provide strings"
116+
msg = "Invalid value for dtype 'str"
120117
with pytest.raises(TypeError, match=msg):
121118
arr[:] = np.array([1, 2])
122119

@@ -508,10 +505,7 @@ def test_fillna_args(dtype):
508505
expected = pd.array(["a", "b"], dtype=dtype)
509506
tm.assert_extension_array_equal(res, expected)
510507

511-
if dtype.storage == "pyarrow":
512-
msg = "Invalid value '1' for dtype str"
513-
else:
514-
msg = "Cannot set non-string value '1' into a StringArray."
508+
msg = "Invalid value '1' for dtype 'str"
515509
with pytest.raises(TypeError, match=msg):
516510
arr.fillna(value=1)
517511

@@ -727,10 +721,7 @@ def test_setitem_scalar_with_mask_validation(dtype):
727721

728722
# for other non-string we should also raise an error
729723
ser = pd.Series(["a", "b", "c"], dtype=dtype)
730-
if dtype.storage == "python":
731-
msg = "Cannot set non-string value"
732-
else:
733-
msg = "Scalar must be NA or str"
724+
msg = "Invalid value '1' for dtype 'str"
734725
with pytest.raises(TypeError, match=msg):
735726
ser[mask] = 1
736727

pandas/tests/frame/indexing/test_indexing.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1274,7 +1274,7 @@ def test_setting_mismatched_na_into_nullable_fails(
12741274
r"timedelta64\[ns\] cannot be converted to (Floating|Integer)Dtype",
12751275
r"datetime64\[ns\] cannot be converted to (Floating|Integer)Dtype",
12761276
"'values' contains non-numeric NA",
1277-
r"Invalid value '.*' for dtype (U?Int|Float)\d{1,2}",
1277+
r"Invalid value '.*' for dtype '(U?Int|Float)\d{1,2}'",
12781278
]
12791279
)
12801280
with pytest.raises(TypeError, match=msg):

pandas/tests/frame/indexing/test_where.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -931,7 +931,7 @@ def test_where_nullable_invalid_na(frame_or_series, any_numeric_ea_dtype):
931931

932932
mask = np.array([True, True, False], ndmin=obj.ndim).T
933933

934-
msg = r"Invalid value '.*' for dtype (U?Int|Float)\d{1,2}"
934+
msg = r"Invalid value '.*' for dtype '(U?Int|Float)\d{1,2}'"
935935

936936
for null in tm.NP_NAT_OBJECTS + [pd.NaT]:
937937
# NaT is an NA value that we should *not* cast to pd.NA dtype
@@ -1030,7 +1030,7 @@ def test_where_int_overflow(replacement, using_infer_string):
10301030
df = DataFrame([[1.0, 2e25, "nine"], [np.nan, 0.1, None]])
10311031
if using_infer_string and replacement not in (None, "snake"):
10321032
with pytest.raises(
1033-
TypeError, match="Cannot set non-string value|Scalar must be NA or str"
1033+
TypeError, match=f"Invalid value '{replacement}' for dtype 'str'"
10341034
):
10351035
df.where(pd.notnull(df), replacement)
10361036
return

pandas/tests/indexing/test_loc.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1230,7 +1230,7 @@ def test_loc_setitem_str_to_small_float_conversion_type(self, using_infer_string
12301230
# assigning with loc/iloc attempts to set the values inplace, which
12311231
# in this case is successful
12321232
if using_infer_string:
1233-
with pytest.raises(TypeError, match="Must provide strings"):
1233+
with pytest.raises(TypeError, match="Invalid value"):
12341234
result.loc[result.index, "A"] = [float(x) for x in col_data]
12351235
else:
12361236
result.loc[result.index, "A"] = [float(x) for x in col_data]

pandas/tests/series/indexing/test_setitem.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -864,7 +864,7 @@ def test_index_where(self, obj, key, expected, raises, val, using_infer_string):
864864
mask[key] = True
865865

866866
if using_infer_string and obj.dtype == object:
867-
with pytest.raises(TypeError, match="Scalar must"):
867+
with pytest.raises(TypeError, match="Invalid value"):
868868
Index(obj).where(~mask, val)
869869
else:
870870
res = Index(obj).where(~mask, val)
@@ -877,7 +877,7 @@ def test_index_putmask(self, obj, key, expected, raises, val, using_infer_string
877877
mask[key] = True
878878

879879
if using_infer_string and obj.dtype == object:
880-
with pytest.raises(TypeError, match="Scalar must"):
880+
with pytest.raises(TypeError, match="Invalid value"):
881881
Index(obj).putmask(mask, val)
882882
else:
883883
res = Index(obj).putmask(mask, val)

0 commit comments

Comments
 (0)