Skip to content

[backport 2.3.x] ERR (string dtype): harmonize setitem error message for python and pyarrow storage (#60219) #60232

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions pandas/core/arrays/arrow/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -1134,7 +1134,7 @@ def fillna(
try:
fill_value = self._box_pa(value, pa_type=self._pa_array.type)
except pa.ArrowTypeError as err:
msg = f"Invalid value '{str(value)}' for dtype {self.dtype}"
msg = f"Invalid value '{value!s}' for dtype '{self.dtype}'"
raise TypeError(msg) from err

try:
Expand Down Expand Up @@ -2126,7 +2126,7 @@ def _maybe_convert_setitem_value(self, value):
try:
value = self._box_pa(value, self._pa_array.type)
except pa.ArrowTypeError as err:
msg = f"Invalid value '{str(value)}' for dtype {self.dtype}"
msg = f"Invalid value '{value!s}' for dtype '{self.dtype}'"
raise TypeError(msg) from err
return value

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/masked.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,7 @@ def _validate_setitem_value(self, value):

# Note: without the "str" here, the f-string rendering raises in
# py38 builds.
raise TypeError(f"Invalid value '{str(value)}' for dtype {self.dtype}")
raise TypeError(f"Invalid value '{value!s}' for dtype '{self.dtype}'")

def __setitem__(self, key, value) -> None:
key = check_array_indexer(self, key)
Expand Down
12 changes: 9 additions & 3 deletions pandas/core/arrays/string_.py
Original file line number Diff line number Diff line change
Expand Up @@ -654,7 +654,8 @@ def _validate_scalar(self, value):
return self.dtype.na_value
elif not isinstance(value, str):
raise TypeError(
f"Cannot set non-string value '{value}' into a string array."
f"Invalid value '{value}' for dtype '{self.dtype}'. Value should be a "
f"string or missing value, got '{type(value).__name__}' instead."
)
return value

Expand Down Expand Up @@ -743,7 +744,9 @@ def __setitem__(self, key, value) -> None:
value = self.dtype.na_value
elif not isinstance(value, str):
raise TypeError(
f"Cannot set non-string value '{value}' into a StringArray."
f"Invalid value '{value}' for dtype '{self.dtype}'. Value should "
f"be a string or missing value, got '{type(value).__name__}' "
"instead."
)
else:
if not is_array_like(value):
Expand All @@ -753,7 +756,10 @@ def __setitem__(self, key, value) -> None:
# compatible, compatibility with arrow backed strings
value = np.asarray(value)
if len(value) and not lib.is_string_array(value, skipna=True):
raise TypeError("Must provide strings.")
raise TypeError(
"Invalid value for dtype 'str'. Value should be a "
"string or missing value (or array of those)."
)

mask = isna(value)
if mask.any():
Expand Down
15 changes: 12 additions & 3 deletions pandas/core/arrays/string_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,10 @@ def insert(self, loc: int, item) -> ArrowStringArray:
if self.dtype.na_value is np.nan and item is np.nan:
item = libmissing.NA
if not isinstance(item, str) and item is not libmissing.NA:
raise TypeError("Scalar must be NA or str")
raise TypeError(
f"Invalid value '{item}' for dtype 'str'. Value should be a "
f"string or missing value, got '{type(item).__name__}' instead."
)
return super().insert(loc, item)

def _convert_bool_result(self, values, na=lib.no_default, method_name=None):
Expand Down Expand Up @@ -249,13 +252,19 @@ def _maybe_convert_setitem_value(self, value):
if isna(value):
value = None
elif not isinstance(value, str):
raise TypeError("Scalar must be NA or str")
raise TypeError(
f"Invalid value '{value}' for dtype 'str'. Value should be a "
f"string or missing value, got '{type(value).__name__}' instead."
)
else:
value = np.array(value, dtype=object, copy=True)
value[isna(value)] = None
for v in value:
if not (v is None or isinstance(v, str)):
raise TypeError("Must provide strings")
raise TypeError(
"Invalid value for dtype 'str'. Value should be a "
"string or missing value (or array of those)."
)
return super()._maybe_convert_setitem_value(value)

def isin(self, values: ArrayLike) -> npt.NDArray[np.bool_]:
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/arrays/masked/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

class TestSetitemValidation:
def _check_setitem_invalid(self, arr, invalid):
msg = f"Invalid value '{str(invalid)}' for dtype {arr.dtype}"
msg = f"Invalid value '{invalid!s}' for dtype '{arr.dtype}'"
msg = re.escape(msg)
with pytest.raises(TypeError, match=msg):
arr[0] = invalid
Expand Down
17 changes: 4 additions & 13 deletions pandas/tests/arrays/string_/test_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,14 +108,11 @@ def test_none_to_nan(cls, dtype):
def test_setitem_validates(cls, dtype):
arr = cls._from_sequence(["a", "b"], dtype=dtype)

if dtype.storage == "python":
msg = "Cannot set non-string value '10' into a StringArray."
else:
msg = "Scalar must be NA or str"
msg = "Invalid value '10' for dtype 'str"
with pytest.raises(TypeError, match=msg):
arr[0] = 10

msg = "Must provide strings"
msg = "Invalid value for dtype 'str"
with pytest.raises(TypeError, match=msg):
arr[:] = np.array([1, 2])

Expand Down Expand Up @@ -510,10 +507,7 @@ def test_fillna_args(dtype):
expected = pd.array(["a", "b"], dtype=dtype)
tm.assert_extension_array_equal(res, expected)

if dtype.storage == "pyarrow":
msg = "Invalid value '1' for dtype str"
else:
msg = "Cannot set non-string value '1' into a StringArray."
msg = "Invalid value '1' for dtype 'str"
with pytest.raises(TypeError, match=msg):
arr.fillna(value=1)

Expand Down Expand Up @@ -754,10 +748,7 @@ def test_setitem_scalar_with_mask_validation(dtype):

# for other non-string we should also raise an error
ser = pd.Series(["a", "b", "c"], dtype=dtype)
if dtype.storage == "python":
msg = "Cannot set non-string value"
else:
msg = "Scalar must be NA or str"
msg = "Invalid value '1' for dtype 'str"
with pytest.raises(TypeError, match=msg):
ser[mask] = 1

Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/frame/indexing/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1340,7 +1340,7 @@ def test_setting_mismatched_na_into_nullable_fails(
r"timedelta64\[ns\] cannot be converted to (Floating|Integer)Dtype",
r"datetime64\[ns\] cannot be converted to (Floating|Integer)Dtype",
"'values' contains non-numeric NA",
r"Invalid value '.*' for dtype (U?Int|Float)\d{1,2}",
r"Invalid value '.*' for dtype '(U?Int|Float)\d{1,2}'",
]
)
with pytest.raises(TypeError, match=msg):
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/frame/indexing/test_where.py
Original file line number Diff line number Diff line change
Expand Up @@ -976,7 +976,7 @@ def test_where_nullable_invalid_na(frame_or_series, any_numeric_ea_dtype):

mask = np.array([True, True, False], ndmin=obj.ndim).T

msg = r"Invalid value '.*' for dtype (U?Int|Float)\d{1,2}"
msg = r"Invalid value '.*' for dtype '(U?Int|Float)\d{1,2}'"

for null in tm.NP_NAT_OBJECTS + [pd.NaT]:
# NaT is an NA value that we should *not* cast to pd.NA dtype
Expand Down Expand Up @@ -1091,7 +1091,7 @@ def test_where_int_overflow(replacement, using_infer_string):
df = DataFrame([[1.0, 2e25, "nine"], [np.nan, 0.1, None]])
if using_infer_string and replacement not in (None, "snake"):
with pytest.raises(
TypeError, match="Cannot set non-string value|Scalar must be NA or str"
TypeError, match=f"Invalid value '{replacement}' for dtype 'str'"
):
df.where(pd.notnull(df), replacement)
return
Expand Down
4 changes: 1 addition & 3 deletions pandas/tests/indexing/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -728,9 +728,7 @@ def run_tests(df, rhs, right_loc, right_iloc):
right_iloc["joe"] = [1.0, "@-28", "@-20", "@-12", 17.0]
right_iloc["jolie"] = ["@2", -26.0, -18.0, -10.0, "@18"]
if using_infer_string:
with pytest.raises(
TypeError, match="Must provide strings|Scalar must be NA or str"
):
with pytest.raises(TypeError, match="Invalid value"):
with tm.assert_produces_warning(
FutureWarning, match="incompatible dtype"
):
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/indexing/test_loc.py
Original file line number Diff line number Diff line change
Expand Up @@ -1292,7 +1292,7 @@ def test_loc_setitem_str_to_small_float_conversion_type(self, using_infer_string
# assigning with loc/iloc attempts to set the values inplace, which
# in this case is successful
if using_infer_string:
with pytest.raises(TypeError, match="Must provide strings"):
with pytest.raises(TypeError, match="Invalid value"):
result.loc[result.index, "A"] = [float(x) for x in col_data]
else:
result.loc[result.index, "A"] = [float(x) for x in col_data]
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/series/indexing/test_setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -888,7 +888,7 @@ def test_index_where(self, obj, key, expected, warn, val, using_infer_string):
mask[key] = True

if using_infer_string and obj.dtype == object:
with pytest.raises(TypeError, match="Scalar must"):
with pytest.raises(TypeError, match="Invalid value"):
Index(obj).where(~mask, val)
else:
res = Index(obj).where(~mask, val)
Expand All @@ -901,7 +901,7 @@ def test_index_putmask(self, obj, key, expected, warn, val, using_infer_string):
mask[key] = True

if using_infer_string and obj.dtype == object:
with pytest.raises(TypeError, match="Scalar must"):
with pytest.raises(TypeError, match="Invalid value"):
Index(obj).putmask(mask, val)
else:
res = Index(obj).putmask(mask, val)
Expand Down
Loading