Skip to content

Commit 00923c2

Browse files
vmaurinVincent Maurin
and
Vincent Maurin
authored
Simplify the uri format validation regexp (networknt#414)
The previous regexp was relying a lot on `|` leading to stackoverflow errors for long URIs (as Java Pattern class is using recursion a lot). I have reworked the regexp from the RFC directly, taking some shortcut to mostly validate what are the allowed or not allowed char in the different part of the URI refs networknt#413 Co-authored-by: Vincent Maurin <[email protected]>
1 parent f4809e5 commit 00923c2

File tree

2 files changed

+61
-1
lines changed

2 files changed

+61
-1
lines changed

src/main/java/com/networknt/schema/JsonMetaSchema.java

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,42 @@ static PatternFormat pattern(String name, String regex) {
4747
"^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$"));
4848
COMMON_BUILTIN_FORMATS.add(pattern("ipv6",
4949
"^\\s*((([0-9A-Fa-f]{1,4}:){7}([0-9A-Fa-f]{1,4}|:))|(([0-9A-Fa-f]{1,4}:){6}(:[0-9A-Fa-f]{1,4}|((25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)(\\.(25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){5}(((:[0-9A-Fa-f]{1,4}){1,2})|:((25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)(\\.(25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){4}(((:[0-9A-Fa-f]{1,4}){1,3})|((:[0-9A-Fa-f]{1,4})?:((25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)(\\.(25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){3}(((:[0-9A-Fa-f]{1,4}){1,4})|((:[0-9A-Fa-f]{1,4}){0,2}:((25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)(\\.(25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){2}(((:[0-9A-Fa-f]{1,4}){1,5})|((:[0-9A-Fa-f]{1,4}){0,3}:((25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)(\\.(25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){1}(((:[0-9A-Fa-f]{1,4}){1,6})|((:[0-9A-Fa-f]{1,4}){0,4}:((25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)(\\.(25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)){3}))|:))|(:(((:[0-9A-Fa-f]{1,4}){1,7})|((:[0-9A-Fa-f]{1,4}){0,5}:((25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)(\\.(25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)){3}))|:)))(%.+)?\\s*$"));
50-
COMMON_BUILTIN_FORMATS.add(pattern("uri", "^([A-Za-z][A-Za-z0-9+.-]+):(\\/\\/([^@]+@)?([A-Za-z0-9.\\-_~]+)(:\\d+)?)?((?:[A-Za-z0-9-._~]|%[A-Fa-f0-9]|[!$&'()*+,;=:@])+(?:\\/(?:[A-Za-z0-9-._~]|%[A-Fa-f0-9]|[!$&'()*+,;=:@])*)*|(?:\\/(?:[A-Za-z0-9-._~]|%[A-Fa-f0-9]|[!$&'()*+,;=:@])+)*)?(\\?(?:[A-Za-z0-9-._~]|%[A-Fa-f0-9]|[!$&'()*+,;=:@]|[/?])*)?(\\#(?:[A-Za-z0-9-._~]|%[A-Fa-f0-9]|[!$&'()*+,;=:@]|[/?])*)?$"));
50+
51+
// From RFC 3986
52+
// ALPHA [A-Za-z]
53+
// DIGIT [0-9]
54+
// scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
55+
// => [A-Za-z][A-Za-z0-9+.-]*
56+
// unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
57+
// => [A-Za-z0-9._~\-]
58+
// gen-delims [:/?#\[\]@]
59+
// sub-delims [!$&'()*+,;=]
60+
// reserved = = gen-delims / sub-delims
61+
// => [:/?#\[\]@!$&'()*+,;=]
62+
// pct-encoded = "%" HEXDIG HEXDIG
63+
// => [A-Za-z0-9%] (approximation)
64+
// pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
65+
// => [A-Za-z0-9._~\-%!$&'()*+,;=:@]
66+
// userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
67+
// => [A-Za-z0-9._~\-%!$&'()*+,;=:]*
68+
// host = IP-literal / IPv4address / reg-name
69+
// => [A-Za-z0-9._~\-!$&'()*+,;=%:\[\]]* (approximation)
70+
// port = *DIGiT
71+
// => [0-9]*
72+
// authority = [ userinfo "@" ] host [ ":" port ]
73+
// => ([A-Za-z0-9._~\-%!$&'()*+,;=:]*@)?[A-Za-z0-9._~\-!$&'()*+,;=%:\[\]]*(:[0-9]*)?
74+
// hier-part = "//" authority path-abempty
75+
// / path-absolute
76+
// / path-rootless
77+
// / path-empty
78+
// => (\/\/([A-Za-z0-9._~\-%!$&'()*+,;=:]*@)?[A-Za-z0-9._~\-!$&'()*+,;=%:\[\]]*(:[0-9]*)?)?[A-Za-z0-9._~\-%!$&'()*+,;=:@\/]* (approximation)
79+
// query = *( pchar / "/" / "?" )
80+
// => [A-Za-z0-9._~\-%!$&'()*+,;=:@\/?]*
81+
// fragment = *( pchar / "/" / "?" )
82+
// => [A-Za-z0-9._~\-%!$&'()*+,;=:@\/?]*
83+
// uri = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
84+
COMMON_BUILTIN_FORMATS.add(pattern("uri",
85+
"^[A-Za-z][A-Za-z0-9+.-]*:(\\/\\/([A-Za-z0-9._~\\-%!$&'()*+,;=:]*@)?[A-Za-z0-9._~\\-!$&'()*+,;=%:\\[\\]]*(:[0-9]*)?)?[A-Za-z0-9._~\\-%!$&'()*+,;=:@\\/]*([?][A-Za-z0-9._~\\-%!$&'()*+,;=:@\\/?]*)?([#][A-Za-z0-9._~\\-%!$&'()*+,;=:@\\/?]*)?"));
5186
COMMON_BUILTIN_FORMATS.add(pattern("color",
5287
"(#?([0-9A-Fa-f]{3,6})\\b)|(aqua)|(black)|(blue)|(fuchsia)|(gray)|(green)|(lime)|(maroon)|(navy)|(olive)|(orange)|(purple)|(red)|(silver)|(teal)|(white)|(yellow)|(rgb\\(\\s*\\b([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\\b\\s*,\\s*\\b([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\\b\\s*,\\s*\\b([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\\b\\s*\\))|(rgb\\(\\s*(\\d?\\d%|100%)+\\s*,\\s*(\\d?\\d%|100%)+\\s*,\\s*(\\d?\\d%|100%)+\\s*\\))"));
5388
COMMON_BUILTIN_FORMATS.add(pattern("hostname",

src/test/resources/draft4/optional/format.json

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,31 @@
211211
"data": "http://json-schema.org/?#",
212212
"valid": true
213213
},
214+
{
215+
"description": "One letter scheme",
216+
"data": "a://foo",
217+
"valid": true
218+
},
219+
{
220+
"description": "very long valid URI",
221+
"data": "http://foo.bar/?baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a#&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a&baz=a%20a",
222+
"valid": true
223+
},
224+
{
225+
"description": "authority with user",
226+
"data": "https://[email protected]",
227+
"valid": true
228+
},
229+
{
230+
"description": "authority with port",
231+
"data": "https://foo.org:12345",
232+
"valid": true
233+
},
234+
{
235+
"description": "authority with user and port",
236+
"data": "https://[email protected]:12345",
237+
"valid": true
238+
},
214239
{
215240
"description": "a scheme is mandatory in URI",
216241
"data": "//foo.bar/?baz=qux#quux",

0 commit comments

Comments
 (0)