Skip to content

TST: Add match=msg to all pytest.raises in pandas/tests/io/json #38705

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion pandas/tests/io/json/test_json_table_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -439,7 +439,11 @@ def test_to_json_categorical_index(self):
"ignore:an integer is required (got type float)*:DeprecationWarning"
)
def test_date_format_raises(self):
with pytest.raises(ValueError):
msg = (
"Trying to write with `orient='table'` and `date_format='epoch'`. Table "
"Schema requires dates to be formatted with `date_format='iso'`"
)
with pytest.raises(ValueError, match=msg):
self.df.to_json(orient="table", date_format="epoch")

# others work
Expand Down
19 changes: 5 additions & 14 deletions pandas/tests/io/json/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -591,7 +591,7 @@ def __str__(self) -> str:

# the same with multiple columns threw segfaults
df_mixed = DataFrame({"A": [binthing], "B": [1]}, columns=["A", "B"])
with pytest.raises(OverflowError):
with pytest.raises(OverflowError, match=msg):
df_mixed.to_json()

# default_handler should resolve exceptions for non-string types
Expand Down Expand Up @@ -1259,19 +1259,10 @@ def test_to_json_large_numbers(self, bigNum):
def test_read_json_large_numbers(self, bigNum):
# GH20599

series = Series(bigNum, dtype=object, index=["articleId"])
json = '{"articleId":' + str(bigNum) + "}"
with pytest.raises(ValueError):
json = StringIO(json)
result = read_json(json)
tm.assert_series_equal(series, result)

df = DataFrame(bigNum, dtype=object, index=["articleId"], columns=[0])
json = '{"0":{"articleId":' + str(bigNum) + "}}"
with pytest.raises(ValueError):
json = StringIO(json)
result = read_json(json)
tm.assert_frame_equal(df, result)
json = StringIO('{"articleId":' + str(bigNum) + "}")
msg = r"Value is too small|Value is too big"
with pytest.raises(ValueError, match=msg):
read_json(json)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like the line for testing the string '{"0":{"articleId":' + str(bigNum) + "}}" is lost with these changes. The two cases '{"articleId":' + str(bigNum) + "}" and '{"0":{"articleId":' + str(bigNum) + "}}" are different, so I think both should be kept.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thanks. looks like I misread it to do the same thing twice.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fixed


def test_read_json_large_numbers2(self):
# GH18842
Expand Down
72 changes: 54 additions & 18 deletions pandas/tests/io/json/test_ujson.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,8 +241,11 @@ def test_double_precision(self):
def test_invalid_double_precision(self, invalid_val):
double_input = 30.12345678901234567890
expected_exception = ValueError if isinstance(invalid_val, int) else TypeError

with pytest.raises(expected_exception):
msg = (
r"Invalid value '.*' for option 'double_precision', max is '15'|"
r"an integer is required \(got type "
)
with pytest.raises(expected_exception, match=msg):
ujson.encode(double_input, double_precision=invalid_val)

def test_encode_string_conversion2(self):
Expand Down Expand Up @@ -447,13 +450,13 @@ class O1:
decoded_input.member = O2()
decoded_input.member.member = decoded_input

with pytest.raises(OverflowError):
with pytest.raises(OverflowError, match="Maximum recursion level reached"):
ujson.encode(decoded_input)

def test_decode_jibberish(self):
jibberish = "fdsa sda v9sa fdsa"

with pytest.raises(ValueError):
msg = "Unexpected character found when decoding 'false'"
with pytest.raises(ValueError, match=msg):
ujson.decode(jibberish)

@pytest.mark.parametrize(
Expand All @@ -466,12 +469,13 @@ def test_decode_jibberish(self):
],
)
def test_decode_broken_json(self, broken_json):
with pytest.raises(ValueError):
msg = "Expected object or value"
with pytest.raises(ValueError, match=msg):
ujson.decode(broken_json)

@pytest.mark.parametrize("too_big_char", ["[", "{"])
def test_decode_depth_too_big(self, too_big_char):
with pytest.raises(ValueError):
with pytest.raises(ValueError, match="Reached object decoding depth limit"):
ujson.decode(too_big_char * (1024 * 1024))

@pytest.mark.parametrize(
Expand All @@ -485,13 +489,27 @@ def test_decode_depth_too_big(self, too_big_char):
],
)
def test_decode_bad_string(self, bad_string):
with pytest.raises(ValueError):
msg = (
"Unexpected character found when decoding|"
"Unmatched ''\"' when when decoding 'string'"
)
with pytest.raises(ValueError, match=msg):
ujson.decode(bad_string)

@pytest.mark.parametrize("broken_json", ['{{1337:""}}', '{{"key":"}', "[[[true"])
def test_decode_broken_json_leak(self, broken_json):
@pytest.mark.parametrize(
"broken_json, err_msg",
[
(
'{{1337:""}}',
"Key name of object must be 'string' when decoding 'object'",
),
('{{"key":"}', "Unmatched ''\"' when when decoding 'string'"),
("[[[true", "Unexpected character found when decoding array value (2)"),
],
)
def test_decode_broken_json_leak(self, broken_json, err_msg):
for _ in range(1000):
with pytest.raises(ValueError):
with pytest.raises(ValueError, match=re.escape(err_msg)):
ujson.decode(broken_json)

@pytest.mark.parametrize(
Expand All @@ -503,7 +521,12 @@ def test_decode_broken_json_leak(self, broken_json):
],
)
def test_decode_invalid_dict(self, invalid_dict):
with pytest.raises(ValueError):
msg = (
"Key name of object must be 'string' when decoding 'object'|"
"No ':' found when decoding object value|"
"Expected object or value"
)
with pytest.raises(ValueError, match=msg):
ujson.decode(invalid_dict)

@pytest.mark.parametrize(
Expand Down Expand Up @@ -567,7 +590,7 @@ def test_dumps_ints_larger_than_maxsize(self, bigNum):
assert str(bigNum) == encoding

# GH20599
with pytest.raises(ValueError):
with pytest.raises(ValueError, match="Value is too big"):
assert ujson.loads(encoding) == bigNum

@pytest.mark.parametrize(
Expand Down Expand Up @@ -803,7 +826,16 @@ def test_0d_array(self):
],
)
def test_array_numpy_except(self, bad_input, exc_type, kwargs):
with pytest.raises(exc_type):
msg = (
"Cannot decode multidimensional arrays with variable length elements to "
"numpy|"
"argument must be a string, a bytes-like object or a number, not|"
"nesting not supported for object or variable length dtypes|"
r"invalid literal for int\(\) with base 10|"
"labels only supported up to 2 dimensions|"
"cannot reshape array of size"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we need to add such a complex match, I would just do a "match all" here

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, again I didn't want to do a match all, so I added the error message to the test parameters. It added a lot of lines due to the line length limit, but I think it's ok. Let me know what you think.

I wanted to try this, but if you really don't like it I will do it your way.

)
with pytest.raises(exc_type, match=msg):
ujson.decode(ujson.dumps(bad_input), numpy=True, **kwargs)

def test_array_numpy_labelled(self):
Expand Down Expand Up @@ -1034,7 +1066,11 @@ def test_datetime_index(self):
],
)
def test_decode_invalid_array(self, invalid_arr):
with pytest.raises(ValueError):
msg = (
"Expected object or value|Trailing data|"
"Unexpected character found when decoding array value"
)
with pytest.raises(ValueError, match=msg):
ujson.decode(invalid_arr)

@pytest.mark.parametrize("arr", [[], [31337]])
Expand All @@ -1049,18 +1085,18 @@ def test_decode_extreme_numbers(self, extreme_num):
"too_extreme_num", ["9223372036854775808", "-90223372036854775809"]
)
def test_decode_too_extreme_numbers(self, too_extreme_num):
with pytest.raises(ValueError):
with pytest.raises(ValueError, match="Value is too big|Value is too small"):
ujson.decode(too_extreme_num)

def test_decode_with_trailing_whitespaces(self):
assert {} == ujson.decode("{}\n\t ")

def test_decode_with_trailing_non_whitespaces(self):
with pytest.raises(ValueError):
with pytest.raises(ValueError, match="Trailing data"):
ujson.decode("{}\n\t a")

def test_decode_array_with_big_int(self):
with pytest.raises(ValueError):
with pytest.raises(ValueError, match="Value is too big"):
ujson.loads("[18446098363113800555]")

@pytest.mark.parametrize(
Expand Down