diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py index 06e7c96277fd6..215d663e68d8f 100644 --- a/pandas/tests/io/json/test_json_table_schema.py +++ b/pandas/tests/io/json/test_json_table_schema.py @@ -439,7 +439,11 @@ def test_to_json_categorical_index(self): "ignore:an integer is required (got type float)*:DeprecationWarning" ) def test_date_format_raises(self): - with pytest.raises(ValueError): + msg = ( + "Trying to write with `orient='table'` and `date_format='epoch'`. Table " + "Schema requires dates to be formatted with `date_format='iso'`" + ) + with pytest.raises(ValueError, match=msg): self.df.to_json(orient="table", date_format="epoch") # others work diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 8f9b6699503ee..851e64a3f2478 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -591,7 +591,7 @@ def __str__(self) -> str: # the same with multiple columns threw segfaults df_mixed = DataFrame({"A": [binthing], "B": [1]}, columns=["A", "B"]) - with pytest.raises(OverflowError): + with pytest.raises(OverflowError, match=msg): df_mixed.to_json() # default_handler should resolve exceptions for non-string types @@ -1259,19 +1259,14 @@ def test_to_json_large_numbers(self, bigNum): def test_read_json_large_numbers(self, bigNum): # GH20599 - series = Series(bigNum, dtype=object, index=["articleId"]) - json = '{"articleId":' + str(bigNum) + "}" - with pytest.raises(ValueError): - json = StringIO(json) - result = read_json(json) - tm.assert_series_equal(series, result) + json = StringIO('{"articleId":' + str(bigNum) + "}") + msg = r"Value is too small|Value is too big" + with pytest.raises(ValueError, match=msg): + read_json(json) - df = DataFrame(bigNum, dtype=object, index=["articleId"], columns=[0]) - json = '{"0":{"articleId":' + str(bigNum) + "}}" - with pytest.raises(ValueError): - json = StringIO(json) - result = read_json(json) - tm.assert_frame_equal(df, result) + json = StringIO('{"0":{"articleId":' + str(bigNum) + "}}") + with pytest.raises(ValueError, match=msg): + read_json(json) def test_read_json_large_numbers2(self): # GH18842 diff --git a/pandas/tests/io/json/test_ujson.py b/pandas/tests/io/json/test_ujson.py index ced0d540f33ef..74adb397d91f4 100644 --- a/pandas/tests/io/json/test_ujson.py +++ b/pandas/tests/io/json/test_ujson.py @@ -241,8 +241,11 @@ def test_double_precision(self): def test_invalid_double_precision(self, invalid_val): double_input = 30.12345678901234567890 expected_exception = ValueError if isinstance(invalid_val, int) else TypeError - - with pytest.raises(expected_exception): + msg = ( + r"Invalid value '.*' for option 'double_precision', max is '15'|" + r"an integer is required \(got type " + ) + with pytest.raises(expected_exception, match=msg): ujson.encode(double_input, double_precision=invalid_val) def test_encode_string_conversion2(self): @@ -447,13 +450,13 @@ class O1: decoded_input.member = O2() decoded_input.member.member = decoded_input - with pytest.raises(OverflowError): + with pytest.raises(OverflowError, match="Maximum recursion level reached"): ujson.encode(decoded_input) def test_decode_jibberish(self): jibberish = "fdsa sda v9sa fdsa" - - with pytest.raises(ValueError): + msg = "Unexpected character found when decoding 'false'" + with pytest.raises(ValueError, match=msg): ujson.decode(jibberish) @pytest.mark.parametrize( @@ -466,12 +469,13 @@ def test_decode_jibberish(self): ], ) def test_decode_broken_json(self, broken_json): - with pytest.raises(ValueError): + msg = "Expected object or value" + with pytest.raises(ValueError, match=msg): ujson.decode(broken_json) @pytest.mark.parametrize("too_big_char", ["[", "{"]) def test_decode_depth_too_big(self, too_big_char): - with pytest.raises(ValueError): + with pytest.raises(ValueError, match="Reached object decoding depth limit"): ujson.decode(too_big_char * (1024 * 1024)) @pytest.mark.parametrize( @@ -485,13 +489,27 @@ def test_decode_depth_too_big(self, too_big_char): ], ) def test_decode_bad_string(self, bad_string): - with pytest.raises(ValueError): + msg = ( + "Unexpected character found when decoding|" + "Unmatched ''\"' when when decoding 'string'" + ) + with pytest.raises(ValueError, match=msg): ujson.decode(bad_string) - @pytest.mark.parametrize("broken_json", ['{{1337:""}}', '{{"key":"}', "[[[true"]) - def test_decode_broken_json_leak(self, broken_json): + @pytest.mark.parametrize( + "broken_json, err_msg", + [ + ( + '{{1337:""}}', + "Key name of object must be 'string' when decoding 'object'", + ), + ('{{"key":"}', "Unmatched ''\"' when when decoding 'string'"), + ("[[[true", "Unexpected character found when decoding array value (2)"), + ], + ) + def test_decode_broken_json_leak(self, broken_json, err_msg): for _ in range(1000): - with pytest.raises(ValueError): + with pytest.raises(ValueError, match=re.escape(err_msg)): ujson.decode(broken_json) @pytest.mark.parametrize( @@ -503,7 +521,12 @@ def test_decode_broken_json_leak(self, broken_json): ], ) def test_decode_invalid_dict(self, invalid_dict): - with pytest.raises(ValueError): + msg = ( + "Key name of object must be 'string' when decoding 'object'|" + "No ':' found when decoding object value|" + "Expected object or value" + ) + with pytest.raises(ValueError, match=msg): ujson.decode(invalid_dict) @pytest.mark.parametrize( @@ -567,7 +590,7 @@ def test_dumps_ints_larger_than_maxsize(self, bigNum): assert str(bigNum) == encoding # GH20599 - with pytest.raises(ValueError): + with pytest.raises(ValueError, match="Value is too big"): assert ujson.loads(encoding) == bigNum @pytest.mark.parametrize( @@ -789,21 +812,70 @@ def test_0d_array(self): ujson.encode(np.array(1)) @pytest.mark.parametrize( - "bad_input,exc_type,kwargs", + "bad_input,exc_type,err_msg,kwargs", [ - ([{}, []], ValueError, {}), - ([42, None], TypeError, {}), - ([["a"], 42], ValueError, {}), - ([42, {}, "a"], TypeError, {}), - ([42, ["a"], 42], ValueError, {}), - (["a", "b", [], "c"], ValueError, {}), - ([{"a": "b"}], ValueError, {"labelled": True}), - ({"a": {"b": {"c": 42}}}, ValueError, {"labelled": True}), - ([{"a": 42, "b": 23}, {"c": 17}], ValueError, {"labelled": True}), + ( + [{}, []], + ValueError, + "nesting not supported for object or variable length dtypes", + {}, + ), + ( + [42, None], + TypeError, + "int() argument must be a string, a bytes-like object or a number, " + "not 'NoneType'", + {}, + ), + ( + [["a"], 42], + ValueError, + "Cannot decode multidimensional arrays with variable length elements " + "to numpy", + {}, + ), + ( + [42, {}, "a"], + TypeError, + "int() argument must be a string, a bytes-like object or a number, " + "not 'dict'", + {}, + ), + ( + [42, ["a"], 42], + ValueError, + "invalid literal for int() with base 10: 'a'", + {}, + ), + ( + ["a", "b", [], "c"], + ValueError, + "nesting not supported for object or variable length dtypes", + {}, + ), + ( + [{"a": "b"}], + ValueError, + "Cannot decode multidimensional arrays with variable length elements " + "to numpy", + {"labelled": True}, + ), + ( + {"a": {"b": {"c": 42}}}, + ValueError, + "labels only supported up to 2 dimensions", + {"labelled": True}, + ), + ( + [{"a": 42, "b": 23}, {"c": 17}], + ValueError, + "cannot reshape array of size 3 into shape (2,1)", + {"labelled": True}, + ), ], ) - def test_array_numpy_except(self, bad_input, exc_type, kwargs): - with pytest.raises(exc_type): + def test_array_numpy_except(self, bad_input, exc_type, err_msg, kwargs): + with pytest.raises(exc_type, match=re.escape(err_msg)): ujson.decode(ujson.dumps(bad_input), numpy=True, **kwargs) def test_array_numpy_labelled(self): @@ -1034,7 +1106,11 @@ def test_datetime_index(self): ], ) def test_decode_invalid_array(self, invalid_arr): - with pytest.raises(ValueError): + msg = ( + "Expected object or value|Trailing data|" + "Unexpected character found when decoding array value" + ) + with pytest.raises(ValueError, match=msg): ujson.decode(invalid_arr) @pytest.mark.parametrize("arr", [[], [31337]]) @@ -1049,18 +1125,18 @@ def test_decode_extreme_numbers(self, extreme_num): "too_extreme_num", ["9223372036854775808", "-90223372036854775809"] ) def test_decode_too_extreme_numbers(self, too_extreme_num): - with pytest.raises(ValueError): + with pytest.raises(ValueError, match="Value is too big|Value is too small"): ujson.decode(too_extreme_num) def test_decode_with_trailing_whitespaces(self): assert {} == ujson.decode("{}\n\t ") def test_decode_with_trailing_non_whitespaces(self): - with pytest.raises(ValueError): + with pytest.raises(ValueError, match="Trailing data"): ujson.decode("{}\n\t a") def test_decode_array_with_big_int(self): - with pytest.raises(ValueError): + with pytest.raises(ValueError, match="Value is too big"): ujson.loads("[18446098363113800555]") @pytest.mark.parametrize(