Skip to content

Commit 00923c2

Browse files
vmaurinVincent Maurin
and
Vincent Maurin
authored
Simplify the uri format validation regexp (#414)
The previous regexp was relying a lot on `|` leading to stackoverflow errors for long URIs (as Java Pattern class is using recursion a lot). I have reworked the regexp from the RFC directly, taking some shortcut to mostly validate what are the allowed or not allowed char in the different part of the URI refs #413 Co-authored-by: Vincent Maurin <[email protected]>
1 parent f4809e5 commit 00923c2

File tree

2 files changed

+61
-1
lines changed

2 files changed

+61
-1
lines changed

src/main/java/com/networknt/schema/JsonMetaSchema.java

+36-1
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,42 @@ static PatternFormat pattern(String name, String regex) {
4747
"^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$"));
4848
COMMON_BUILTIN_FORMATS.add(pattern("ipv6",
4949
"^\\s*((([0-9A-Fa-f]{1,4}:){7}([0-9A-Fa-f]{1,4}|:))|(([0-9A-Fa-f]{1,4}:){6}(:[0-9A-Fa-f]{1,4}|((25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)(\\.(25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){5}(((:[0-9A-Fa-f]{1,4}){1,2})|:((25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)(\\.(25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){4}(((:[0-9A-Fa-f]{1,4}){1,3})|((:[0-9A-Fa-f]{1,4})?:((25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)(\\.(25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){3}(((:[0-9A-Fa-f]{1,4}){1,4})|((:[0-9A-Fa-f]{1,4}){0,2}:((25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)(\\.(25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){2}(((:[0-9A-Fa-f]{1,4}){1,5})|((:[0-9A-Fa-f]{1,4}){0,3}:((25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)(\\.(25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){1}(((:[0-9A-Fa-f]{1,4}){1,6})|((:[0-9A-Fa-f]{1,4}){0,4}:((25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)(\\.(25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)){3}))|:))|(:(((:[0-9A-Fa-f]{1,4}){1,7})|((:[0-9A-Fa-f]{1,4}){0,5}:((25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)(\\.(25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)){3}))|:)))(%.+)?\\s*$"));
50-
COMMON_BUILTIN_FORMATS.add(pattern("uri", "^([A-Za-z][A-Za-z0-9+.-]+):(\\/\\/([^@]+@)?([A-Za-z0-9.\\-_~]+)(:\\d+)?)?((?:[A-Za-z0-9-._~]|%[A-Fa-f0-9]|[!$&'()*+,;=:@])+(?:\\/(?:[A-Za-z0-9-._~]|%[A-Fa-f0-9]|[!$&'()*+,;=:@])*)*|(?:\\/(?:[A-Za-z0-9-._~]|%[A-Fa-f0-9]|[!$&'()*+,;=:@])+)*)?(\\?(?:[A-Za-z0-9-._~]|%[A-Fa-f0-9]|[!$&'()*+,;=:@]|[/?])*)?(\\#(?:[A-Za-z0-9-._~]|%[A-Fa-f0-9]|[!$&'()*+,;=:@]|[/?])*)?$"));
50+
51+
// From RFC 3986
52+
// ALPHA [A-Za-z]
53+
// DIGIT [0-9]
54+
// scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
55+
// => [A-Za-z][A-Za-z0-9+.-]*
56+
// unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
57+
// => [A-Za-z0-9._~\-]
58+
// gen-delims [:/?#\[\]@]
59+
// sub-delims [!$&'()*+,;=]
60+
// reserved = = gen-delims / sub-delims
61+
// => [:/?#\[\]@!$&'()*+,;=]
62+
// pct-encoded = "%" HEXDIG HEXDIG
63+
// => [A-Za-z0-9%] (approximation)
64+
// pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
65+
// => [A-Za-z0-9._~\-%!$&'()*+,;=:@]
66+
// userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
67+
// => [A-Za-z0-9._~\-%!$&'()*+,;=:]*
68+
// host = IP-literal / IPv4address / reg-name
69+
// => [A-Za-z0-9._~\-!$&'()*+,;=%:\[\]]* (approximation)
70+
// port = *DIGiT
71+
// => [0-9]*
72+
// authority = [ userinfo "@" ] host [ ":" port ]
73+
// => ([A-Za-z0-9._~\-%!$&'()*+,;=:]*@)?[A-Za-z0-9._~\-!$&'()*+,;=%:\[\]]*(:[0-9]*)?
74+
// hier-part = "//" authority path-abempty
75+
// / path-absolute
76+
// / path-rootless
77+
// / path-empty
78+
// => (\/\/([A-Za-z0-9._~\-%!$&'()*+,;=:]*@)?[A-Za-z0-9._~\-!$&'()*+,;=%:\[\]]*(:[0-9]*)?)?[A-Za-z0-9._~\-%!$&'()*+,;=:@\/]* (approximation)
79+
// query = *( pchar / "/" / "?" )
80+
// => [A-Za-z0-9._~\-%!$&'()*+,;=:@\/?]*
81+
// fragment = *( pchar / "/" / "?" )
82+
// => [A-Za-z0-9._~\-%!$&'()*+,;=:@\/?]*
83+
// uri = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
84+
COMMON_BUILTIN_FORMATS.add(pattern("uri",
85+
"^[A-Za-z][A-Za-z0-9+.-]*:(\\/\\/([A-Za-z0-9._~\\-%!$&'()*+,;=:]*@)?[A-Za-z0-9._~\\-!$&'()*+,;=%:\\[\\]]*(:[0-9]*)?)?[A-Za-z0-9._~\\-%!$&'()*+,;=:@\\/]*([?][A-Za-z0-9._~\\-%!$&'()*+,;=:@\\/?]*)?([#][A-Za-z0-9._~\\-%!$&'()*+,;=:@\\/?]*)?"));
5186
COMMON_BUILTIN_FORMATS.add(pattern("color",
5287
"(#?([0-9A-Fa-f]{3,6})\\b)|(aqua)|(black)|(blue)|(fuchsia)|(gray)|(green)|(lime)|(maroon)|(navy)|(olive)|(orange)|(purple)|(red)|(silver)|(teal)|(white)|(yellow)|(rgb\\(\\s*\\b([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\\b\\s*,\\s*\\b([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\\b\\s*,\\s*\\b([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\\b\\s*\\))|(rgb\\(\\s*(\\d?\\d%|100%)+\\s*,\\s*(\\d?\\d%|100%)+\\s*,\\s*(\\d?\\d%|100%)+\\s*\\))"));
5388
COMMON_BUILTIN_FORMATS.add(pattern("hostname",

src/test/resources/draft4/optional/format.json

+25
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,31 @@
211211
"data": "http://json-schema.org/?#",
212212
"valid": true
213213
},
214+
{
215+
"description": "One letter scheme",
216+
"data": "a://foo",
217+
"valid": true
218+
},
219+
{
220+
"description": "very long valid URI",
221+
"data": "http://foo.bar/?baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a#&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a",
222+
"valid": true
223+
},
224+
{
225+
"description": "authority with user",
226+
"data": "https://[email protected]",
227+
"valid": true
228+
},
229+
{
230+
"description": "authority with port",
231+
"data": "https://foo.org:12345",
232+
"valid": true
233+
},
234+
{
235+
"description": "authority with user and port",
236+
"data": "https://[email protected]:12345",
237+
"valid": true
238+
},
214239
{
215240
"description": "a scheme is mandatory in URI",
216241
"data": "//foo.bar/?baz=qux#quux",

0 commit comments

Comments
 (0)