Skip to content

Commit c5f6ca9

Browse files
Merge pull request #124 from davishmcclurg/non-breaking
Various `format` validation improvements
2 parents 0861dd2 + 5350f40 commit c5f6ca9

File tree

8 files changed

+159
-44
lines changed

8 files changed

+159
-44
lines changed

Gemfile.lock

+8
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ PATH
55
ecma-re-validator (~> 0.3)
66
hana (~> 1.3)
77
regexp_parser (~> 2.0)
8+
simpleidn (~> 0.2)
89
uri_template (~> 0.7)
910

1011
GEM
@@ -16,9 +17,16 @@ GEM
1617
minitest (5.15.0)
1718
rake (13.0.6)
1819
regexp_parser (2.6.1)
20+
simpleidn (0.2.1)
21+
unf (~> 0.1.4)
22+
unf (0.1.4)
23+
unf_ext
24+
unf (0.1.4-java)
25+
unf_ext (0.0.8.2)
1926
uri_template (0.7.0)
2027

2128
PLATFORMS
29+
java
2230
ruby
2331

2432
DEPENDENCIES

bin/hostname_character_classes

+42
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
#!/usr/bin/env ruby
2+
3+
require 'open-uri'
4+
require 'csv'
5+
6+
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.1
7+
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.2
8+
9+
csv_options = { :col_sep => ';', :skip_blanks => true, :skip_lines => /\A#/ }
10+
11+
unicode_data = URI('https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt')
12+
derived_joining_type = URI('https://www.unicode.org/Public/UCD/latest/ucd/extracted/DerivedJoiningType.txt')
13+
14+
# https://www.unicode.org/reports/tr44/#Canonical_Combining_Class_Values
15+
virama_canonical_combining_class = '9'
16+
17+
virama_codes = CSV.new(unicode_data.read, **csv_options).select do |code, _name, _category, canonical_combining_class|
18+
canonical_combining_class == virama_canonical_combining_class
19+
end.map(&:first)
20+
21+
# https://www.unicode.org/reports/tr44/#Default_Values
22+
# https://www.unicode.org/reports/tr44/#Derived_Extracted
23+
codes_by_joining_type = CSV.new(derived_joining_type.read, **csv_options).group_by do |_code, joining_type|
24+
joining_type.gsub(/#.+/, '').strip
25+
end.transform_values do |rows|
26+
rows.map do |code, _joining_type|
27+
code.strip
28+
end
29+
end
30+
31+
def codes_to_character_class(codes)
32+
characters = codes.map do |code|
33+
code.gsub(/(\h+)/, '\u{\1}').gsub('..', '-')
34+
end
35+
"[#{characters.join}]"
36+
end
37+
38+
puts "VIRAMA_CHARACTER_CLASS = '#{codes_to_character_class(virama_codes)}'"
39+
40+
codes_by_joining_type.slice('L', 'D', 'T', 'R').each do |joining_type, codes|
41+
puts "JOINING_TYPE_#{joining_type}_CHARACTER_CLASS = '#{codes_to_character_class(codes)}'"
42+
end

json_schemer.gemspec

+2-9
Original file line numberDiff line numberDiff line change
@@ -26,16 +26,9 @@ Gem::Specification.new do |spec|
2626
spec.add_development_dependency "rake", "~> 13.0"
2727
spec.add_development_dependency "minitest", "~> 5.0"
2828

29-
# spec.add_development_dependency "benchmark-ips", "~> 2.7.2"
30-
# spec.add_development_dependency "jschema", "~> 0.2.1"
31-
# spec.add_development_dependency "json-schema", "~> 2.8.0"
32-
# spec.add_development_dependency "json_schema", "~> 0.17.0"
33-
# spec.add_development_dependency "json_validation", "~> 0.1.0"
34-
# spec.add_development_dependency "jsonschema", "~> 2.0.2"
35-
# spec.add_development_dependency "rj_schema", "~> 0.2.0"
36-
3729
spec.add_runtime_dependency "ecma-re-validator", "~> 0.3"
3830
spec.add_runtime_dependency "hana", "~> 1.3"
39-
spec.add_runtime_dependency "uri_template", "~> 0.7"
4031
spec.add_runtime_dependency "regexp_parser", "~> 2.0"
32+
spec.add_runtime_dependency "simpleidn", "~> 0.2"
33+
spec.add_runtime_dependency "uri_template", "~> 0.7"
4134
end

lib/json_schemer.rb

+2
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,11 @@
1212
require 'ecma-re-validator'
1313
require 'hana'
1414
require 'regexp_parser'
15+
require 'simpleidn'
1516
require 'uri_template'
1617

1718
require 'json_schemer/version'
19+
require 'json_schemer/format/hostname'
1820
require 'json_schemer/format'
1921
require 'json_schemer/errors'
2022
require 'json_schemer/cached_resolver'

lib/json_schemer/format.rb

+18-15
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,18 @@
11
# frozen_string_literal: true
22
module JSONSchemer
33
module Format
4+
include Hostname
5+
46
# this is no good
57
EMAIL_REGEX = /\A[^@\s]+@([\p{L}\d-]+\.)+[\p{L}\d\-]{2,}\z/i.freeze
6-
LABEL_REGEX_STRING = '[\p{L}\p{N}]([\p{L}\p{N}\-]*[\p{L}\p{N}])?'
7-
HOSTNAME_REGEX = /\A(#{LABEL_REGEX_STRING}\.)*#{LABEL_REGEX_STRING}\z/i.freeze
88
JSON_POINTER_REGEX_STRING = '(\/([^~\/]|~[01])*)*'
99
JSON_POINTER_REGEX = /\A#{JSON_POINTER_REGEX_STRING}\z/.freeze
1010
RELATIVE_JSON_POINTER_REGEX = /\A(0|[1-9]\d*)(#|#{JSON_POINTER_REGEX_STRING})?\z/.freeze
1111
DATE_TIME_OFFSET_REGEX = /(Z|[\+\-]([01][0-9]|2[0-3]):[0-5][0-9])\z/i.freeze
12-
INVALID_QUERY_REGEX = /[[:space:]]/.freeze
12+
HOUR_24_REGEX = /T24/.freeze
13+
LEAP_SECOND_REGEX = /T\d{2}:\d{2}:6/.freeze
14+
IP_REGEX = /\A[\h:.]+\z/.freeze
15+
INVALID_QUERY_REGEX = /\s/.freeze
1316

1417
def valid_spec_format?(data, format)
1518
case format
@@ -28,9 +31,9 @@ def valid_spec_format?(data, format)
2831
when 'idn-hostname'
2932
valid_hostname?(data)
3033
when 'ipv4'
31-
valid_ip?(data, :v4)
34+
valid_ip?(data, Socket::AF_INET)
3235
when 'ipv6'
33-
valid_ip?(data, :v6)
36+
valid_ip?(data, Socket::AF_INET6)
3437
when 'uri'
3538
valid_uri?(data)
3639
when 'uri-reference'
@@ -58,24 +61,24 @@ def valid_json?(data)
5861
end
5962

6063
def valid_date_time?(data)
61-
DateTime.rfc3339(data)
64+
return false if HOUR_24_REGEX.match?(data)
65+
datetime = DateTime.rfc3339(data)
66+
return false if LEAP_SECOND_REGEX.match?(data) && datetime.to_time.utc.strftime('%H:%M') != '23:59'
6267
DATE_TIME_OFFSET_REGEX.match?(data)
6368
rescue ArgumentError
6469
false
6570
end
6671

6772
def valid_email?(data)
68-
EMAIL_REGEX.match?(data)
69-
end
70-
71-
def valid_hostname?(data)
72-
HOSTNAME_REGEX.match?(data) && data.split('.').all? { |label| label.size <= 63 }
73+
return false unless EMAIL_REGEX.match?(data)
74+
local, _domain = data.partition('@')
75+
!local.start_with?('.') && !local.end_with?('.') && !local.include?('..')
7376
end
7477

75-
def valid_ip?(data, type)
76-
ip_address = IPAddr.new(data)
77-
type == :v4 ? ip_address.ipv4? : ip_address.ipv6?
78-
rescue IPAddr::InvalidAddressError
78+
def valid_ip?(data, family)
79+
IPAddr.new(data, family)
80+
IP_REGEX.match?(data)
81+
rescue IPAddr::Error
7982
false
8083
end
8184

lib/json_schemer/format/hostname.rb

+58
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
# frozen_string_literal: true
2+
module JSONSchemer
3+
module Format
4+
module Hostname
5+
# https://datatracker.ietf.org/doc/html/rfc5892#section-2.1
6+
MARKS = '\p{Mn}\p{Mc}'
7+
LETTER_DIGITS = "\\p{Ll}\\p{Lu}\\p{Lo}\\p{Nd}\\p{Lm}#{MARKS}"
8+
# https://datatracker.ietf.org/doc/html/rfc5892#section-2.6
9+
EXCEPTIONS_PVALID = '\u{06FD}\u{06FE}\u{0F0B}\u{3007}' # \u{00DF}\u{03C2} covered by \p{Ll}
10+
EXCEPTIONS_DISALLOWED = '\u{0640}\u{07FA}\u{302E}\u{302F}\u{3031}\u{3032}\u{3033}\u{3034}\u{3035}\u{303B}'
11+
LABEL_CHARACTER_CLASS = "[#{LETTER_DIGITS}#{EXCEPTIONS_PVALID}&&[^#{EXCEPTIONS_DISALLOWED}]]"
12+
# https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.3.2
13+
LEADING_CHARACTER_CLASS = "[#{LABEL_CHARACTER_CLASS}&&[^#{MARKS}]]"
14+
LABEL_REGEX_STRING = "#{LEADING_CHARACTER_CLASS}([#{LABEL_CHARACTER_CLASS}\-]*#{LABEL_CHARACTER_CLASS})?"
15+
HOSTNAME_REGEX = /\A(#{LABEL_REGEX_STRING}\.)*#{LABEL_REGEX_STRING}\z/i.freeze
16+
# bin/hostname_character_classes
17+
VIRAMA_CHARACTER_CLASS = '[\u{094D}\u{09CD}\u{0A4D}\u{0ACD}\u{0B4D}\u{0BCD}\u{0C4D}\u{0CCD}\u{0D3B}\u{0D3C}\u{0D4D}\u{0DCA}\u{0E3A}\u{0EBA}\u{0F84}\u{1039}\u{103A}\u{1714}\u{1715}\u{1734}\u{17D2}\u{1A60}\u{1B44}\u{1BAA}\u{1BAB}\u{1BF2}\u{1BF3}\u{2D7F}\u{A806}\u{A82C}\u{A8C4}\u{A953}\u{A9C0}\u{AAF6}\u{ABED}\u{10A3F}\u{11046}\u{11070}\u{1107F}\u{110B9}\u{11133}\u{11134}\u{111C0}\u{11235}\u{112EA}\u{1134D}\u{11442}\u{114C2}\u{115BF}\u{1163F}\u{116B6}\u{1172B}\u{11839}\u{1193D}\u{1193E}\u{119E0}\u{11A34}\u{11A47}\u{11A99}\u{11C3F}\u{11D44}\u{11D45}\u{11D97}\u{11F41}\u{11F42}]'
18+
JOINING_TYPE_L_CHARACTER_CLASS = '[\u{A872}\u{10ACD}\u{10AD7}\u{10D00}\u{10FCB}]'
19+
JOINING_TYPE_D_CHARACTER_CLASS = '[\u{0620}\u{0626}\u{0628}\u{062A}-\u{062E}\u{0633}-\u{063F}\u{0641}-\u{0647}\u{0649}-\u{064A}\u{066E}-\u{066F}\u{0678}-\u{0687}\u{069A}-\u{06BF}\u{06C1}-\u{06C2}\u{06CC}\u{06CE}\u{06D0}-\u{06D1}\u{06FA}-\u{06FC}\u{06FF}\u{0712}-\u{0714}\u{071A}-\u{071D}\u{071F}-\u{0727}\u{0729}\u{072B}\u{072D}-\u{072E}\u{074E}-\u{0758}\u{075C}-\u{076A}\u{076D}-\u{0770}\u{0772}\u{0775}-\u{0777}\u{077A}-\u{077F}\u{07CA}-\u{07EA}\u{0841}-\u{0845}\u{0848}\u{084A}-\u{0853}\u{0855}\u{0860}\u{0862}-\u{0865}\u{0868}\u{0886}\u{0889}-\u{088D}\u{08A0}-\u{08A9}\u{08AF}-\u{08B0}\u{08B3}-\u{08B8}\u{08BA}-\u{08C8}\u{1807}\u{1820}-\u{1842}\u{1843}\u{1844}-\u{1878}\u{1887}-\u{18A8}\u{18AA}\u{A840}-\u{A871}\u{10AC0}-\u{10AC4}\u{10AD3}-\u{10AD6}\u{10AD8}-\u{10ADC}\u{10ADE}-\u{10AE0}\u{10AEB}-\u{10AEE}\u{10B80}\u{10B82}\u{10B86}-\u{10B88}\u{10B8A}-\u{10B8B}\u{10B8D}\u{10B90}\u{10BAD}-\u{10BAE}\u{10D01}-\u{10D21}\u{10D23}\u{10F30}-\u{10F32}\u{10F34}-\u{10F44}\u{10F51}-\u{10F53}\u{10F70}-\u{10F73}\u{10F76}-\u{10F81}\u{10FB0}\u{10FB2}-\u{10FB3}\u{10FB8}\u{10FBB}-\u{10FBC}\u{10FBE}-\u{10FBF}\u{10FC1}\u{10FC4}\u{10FCA}\u{1E900}-\u{1E943}]'
20+
JOINING_TYPE_T_CHARACTER_CLASS = '[\u{00AD}\u{0300}-\u{036F}\u{0483}-\u{0487}\u{0488}-\u{0489}\u{0591}-\u{05BD}\u{05BF}\u{05C1}-\u{05C2}\u{05C4}-\u{05C5}\u{05C7}\u{0610}-\u{061A}\u{061C}\u{064B}-\u{065F}\u{0670}\u{06D6}-\u{06DC}\u{06DF}-\u{06E4}\u{06E7}-\u{06E8}\u{06EA}-\u{06ED}\u{070F}\u{0711}\u{0730}-\u{074A}\u{07A6}-\u{07B0}\u{07EB}-\u{07F3}\u{07FD}\u{0816}-\u{0819}\u{081B}-\u{0823}\u{0825}-\u{0827}\u{0829}-\u{082D}\u{0859}-\u{085B}\u{0898}-\u{089F}\u{08CA}-\u{08E1}\u{08E3}-\u{0902}\u{093A}\u{093C}\u{0941}-\u{0948}\u{094D}\u{0951}-\u{0957}\u{0962}-\u{0963}\u{0981}\u{09BC}\u{09C1}-\u{09C4}\u{09CD}\u{09E2}-\u{09E3}\u{09FE}\u{0A01}-\u{0A02}\u{0A3C}\u{0A41}-\u{0A42}\u{0A47}-\u{0A48}\u{0A4B}-\u{0A4D}\u{0A51}\u{0A70}-\u{0A71}\u{0A75}\u{0A81}-\u{0A82}\u{0ABC}\u{0AC1}-\u{0AC5}\u{0AC7}-\u{0AC8}\u{0ACD}\u{0AE2}-\u{0AE3}\u{0AFA}-\u{0AFF}\u{0B01}\u{0B3C}\u{0B3F}\u{0B41}-\u{0B44}\u{0B4D}\u{0B55}-\u{0B56}\u{0B62}-\u{0B63}\u{0B82}\u{0BC0}\u{0BCD}\u{0C00}\u{0C04}\u{0C3C}\u{0C3E}-\u{0C40}\u{0C46}-\u{0C48}\u{0C4A}-\u{0C4D}\u{0C55}-\u{0C56}\u{0C62}-\u{0C63}\u{0C81}\u{0CBC}\u{0CBF}\u{0CC6}\u{0CCC}-\u{0CCD}\u{0CE2}-\u{0CE3}\u{0D00}-\u{0D01}\u{0D3B}-\u{0D3C}\u{0D41}-\u{0D44}\u{0D4D}\u{0D62}-\u{0D63}\u{0D81}\u{0DCA}\u{0DD2}-\u{0DD4}\u{0DD6}\u{0E31}\u{0E34}-\u{0E3A}\u{0E47}-\u{0E4E}\u{0EB1}\u{0EB4}-\u{0EBC}\u{0EC8}-\u{0ECE}\u{0F18}-\u{0F19}\u{0F35}\u{0F37}\u{0F39}\u{0F71}-\u{0F7E}\u{0F80}-\u{0F84}\u{0F86}-\u{0F87}\u{0F8D}-\u{0F97}\u{0F99}-\u{0FBC}\u{0FC6}\u{102D}-\u{1030}\u{1032}-\u{1037}\u{1039}-\u{103A}\u{103D}-\u{103E}\u{1058}-\u{1059}\u{105E}-\u{1060}\u{1071}-\u{1074}\u{1082}\u{1085}-\u{1086}\u{108D}\u{109D}\u{135D}-\u{135F}\u{1712}-\u{1714}\u{1732}-\u{1733}\u{1752}-\u{1753}\u{1772}-\u{1773}\u{17B4}-\u{17B5}\u{17B7}-\u{17BD}\u{17C6}\u{17C9}-\u{17D3}\u{17DD}\u{180B}-\u{180D}\u{180F}\u{1885}-\u{1886}\u{18A9}\u{1920}-\u{1922}\u{1927}-\u{1928}\u{1932}\u{1939}-\u{193B}\u{1A17}-\u{1A18}\u{1A1B}\u{1A56}\u{1A58}-\u{1A5E}\u{1A60}\u{1A62}\u{1A65}-\u{1A6C}\u{1A73}-\u{1A7C}\u{1A7F}\u{1AB0}-\u{1ABD}\u{1ABE}\u{1ABF}-\u{1ACE}\u{1B00}-\u{1B03}\u{1B34}\u{1B36}-\u{1B3A}\u{1B3C}\u{1B42}\u{1B6B}-\u{1B73}\u{1B80}-\u{1B81}\u{1BA2}-\u{1BA5}\u{1BA8}-\u{1BA9}\u{1BAB}-\u{1BAD}\u{1BE6}\u{1BE8}-\u{1BE9}\u{1BED}\u{1BEF}-\u{1BF1}\u{1C2C}-\u{1C33}\u{1C36}-\u{1C37}\u{1CD0}-\u{1CD2}\u{1CD4}-\u{1CE0}\u{1CE2}-\u{1CE8}\u{1CED}\u{1CF4}\u{1CF8}-\u{1CF9}\u{1DC0}-\u{1DFF}\u{200B}\u{200E}-\u{200F}\u{202A}-\u{202E}\u{2060}-\u{2064}\u{206A}-\u{206F}\u{20D0}-\u{20DC}\u{20DD}-\u{20E0}\u{20E1}\u{20E2}-\u{20E4}\u{20E5}-\u{20F0}\u{2CEF}-\u{2CF1}\u{2D7F}\u{2DE0}-\u{2DFF}\u{302A}-\u{302D}\u{3099}-\u{309A}\u{A66F}\u{A670}-\u{A672}\u{A674}-\u{A67D}\u{A69E}-\u{A69F}\u{A6F0}-\u{A6F1}\u{A802}\u{A806}\u{A80B}\u{A825}-\u{A826}\u{A82C}\u{A8C4}-\u{A8C5}\u{A8E0}-\u{A8F1}\u{A8FF}\u{A926}-\u{A92D}\u{A947}-\u{A951}\u{A980}-\u{A982}\u{A9B3}\u{A9B6}-\u{A9B9}\u{A9BC}-\u{A9BD}\u{A9E5}\u{AA29}-\u{AA2E}\u{AA31}-\u{AA32}\u{AA35}-\u{AA36}\u{AA43}\u{AA4C}\u{AA7C}\u{AAB0}\u{AAB2}-\u{AAB4}\u{AAB7}-\u{AAB8}\u{AABE}-\u{AABF}\u{AAC1}\u{AAEC}-\u{AAED}\u{AAF6}\u{ABE5}\u{ABE8}\u{ABED}\u{FB1E}\u{FE00}-\u{FE0F}\u{FE20}-\u{FE2F}\u{FEFF}\u{FFF9}-\u{FFFB}\u{101FD}\u{102E0}\u{10376}-\u{1037A}\u{10A01}-\u{10A03}\u{10A05}-\u{10A06}\u{10A0C}-\u{10A0F}\u{10A38}-\u{10A3A}\u{10A3F}\u{10AE5}-\u{10AE6}\u{10D24}-\u{10D27}\u{10EAB}-\u{10EAC}\u{10EFD}-\u{10EFF}\u{10F46}-\u{10F50}\u{10F82}-\u{10F85}\u{11001}\u{11038}-\u{11046}\u{11070}\u{11073}-\u{11074}\u{1107F}-\u{11081}\u{110B3}-\u{110B6}\u{110B9}-\u{110BA}\u{110C2}\u{11100}-\u{11102}\u{11127}-\u{1112B}\u{1112D}-\u{11134}\u{11173}\u{11180}-\u{11181}\u{111B6}-\u{111BE}\u{111C9}-\u{111CC}\u{111CF}\u{1122F}-\u{11231}\u{11234}\u{11236}-\u{11237}\u{1123E}\u{11241}\u{112DF}\u{112E3}-\u{112EA}\u{11300}-\u{11301}\u{1133B}-\u{1133C}\u{11340}\u{11366}-\u{1136C}\u{11370}-\u{11374}\u{11438}-\u{1143F}\u{11442}-\u{11444}\u{11446}\u{1145E}\u{114B3}-\u{114B8}\u{114BA}\u{114BF}-\u{114C0}\u{114C2}-\u{114C3}\u{115B2}-\u{115B5}\u{115BC}-\u{115BD}\u{115BF}-\u{115C0}\u{115DC}-\u{115DD}\u{11633}-\u{1163A}\u{1163D}\u{1163F}-\u{11640}\u{116AB}\u{116AD}\u{116B0}-\u{116B5}\u{116B7}\u{1171D}-\u{1171F}\u{11722}-\u{11725}\u{11727}-\u{1172B}\u{1182F}-\u{11837}\u{11839}-\u{1183A}\u{1193B}-\u{1193C}\u{1193E}\u{11943}\u{119D4}-\u{119D7}\u{119DA}-\u{119DB}\u{119E0}\u{11A01}-\u{11A0A}\u{11A33}-\u{11A38}\u{11A3B}-\u{11A3E}\u{11A47}\u{11A51}-\u{11A56}\u{11A59}-\u{11A5B}\u{11A8A}-\u{11A96}\u{11A98}-\u{11A99}\u{11C30}-\u{11C36}\u{11C38}-\u{11C3D}\u{11C3F}\u{11C92}-\u{11CA7}\u{11CAA}-\u{11CB0}\u{11CB2}-\u{11CB3}\u{11CB5}-\u{11CB6}\u{11D31}-\u{11D36}\u{11D3A}\u{11D3C}-\u{11D3D}\u{11D3F}-\u{11D45}\u{11D47}\u{11D90}-\u{11D91}\u{11D95}\u{11D97}\u{11EF3}-\u{11EF4}\u{11F00}-\u{11F01}\u{11F36}-\u{11F3A}\u{11F40}\u{11F42}\u{13430}-\u{1343F}\u{13440}\u{13447}-\u{13455}\u{16AF0}-\u{16AF4}\u{16B30}-\u{16B36}\u{16F4F}\u{16F8F}-\u{16F92}\u{16FE4}\u{1BC9D}-\u{1BC9E}\u{1BCA0}-\u{1BCA3}\u{1CF00}-\u{1CF2D}\u{1CF30}-\u{1CF46}\u{1D167}-\u{1D169}\u{1D173}-\u{1D17A}\u{1D17B}-\u{1D182}\u{1D185}-\u{1D18B}\u{1D1AA}-\u{1D1AD}\u{1D242}-\u{1D244}\u{1DA00}-\u{1DA36}\u{1DA3B}-\u{1DA6C}\u{1DA75}\u{1DA84}\u{1DA9B}-\u{1DA9F}\u{1DAA1}-\u{1DAAF}\u{1E000}-\u{1E006}\u{1E008}-\u{1E018}\u{1E01B}-\u{1E021}\u{1E023}-\u{1E024}\u{1E026}-\u{1E02A}\u{1E08F}\u{1E130}-\u{1E136}\u{1E2AE}\u{1E2EC}-\u{1E2EF}\u{1E4EC}-\u{1E4EF}\u{1E8D0}-\u{1E8D6}\u{1E944}-\u{1E94A}\u{1E94B}\u{E0001}\u{E0020}-\u{E007F}\u{E0100}-\u{E01EF}]'
21+
JOINING_TYPE_R_CHARACTER_CLASS = '[\u{0622}-\u{0625}\u{0627}\u{0629}\u{062F}-\u{0632}\u{0648}\u{0671}-\u{0673}\u{0675}-\u{0677}\u{0688}-\u{0699}\u{06C0}\u{06C3}-\u{06CB}\u{06CD}\u{06CF}\u{06D2}-\u{06D3}\u{06D5}\u{06EE}-\u{06EF}\u{0710}\u{0715}-\u{0719}\u{071E}\u{0728}\u{072A}\u{072C}\u{072F}\u{074D}\u{0759}-\u{075B}\u{076B}-\u{076C}\u{0771}\u{0773}-\u{0774}\u{0778}-\u{0779}\u{0840}\u{0846}-\u{0847}\u{0849}\u{0854}\u{0856}-\u{0858}\u{0867}\u{0869}-\u{086A}\u{0870}-\u{0882}\u{088E}\u{08AA}-\u{08AC}\u{08AE}\u{08B1}-\u{08B2}\u{08B9}\u{10AC5}\u{10AC7}\u{10AC9}-\u{10ACA}\u{10ACE}-\u{10AD2}\u{10ADD}\u{10AE1}\u{10AE4}\u{10AEF}\u{10B81}\u{10B83}-\u{10B85}\u{10B89}\u{10B8C}\u{10B8E}-\u{10B8F}\u{10B91}\u{10BA9}-\u{10BAC}\u{10D22}\u{10F33}\u{10F54}\u{10F74}-\u{10F75}\u{10FB4}-\u{10FB6}\u{10FB9}-\u{10FBA}\u{10FBD}\u{10FC2}-\u{10FC3}\u{10FC9}]'
22+
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.1
23+
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.2
24+
ZERO_WIDTH_VIRAMA = "#{VIRAMA_CHARACTER_CLASS}[\\u{200C}\\u{200D}]"
25+
ZERO_WIDTH_NON_JOINER_JOINING_TYPE = "[#{JOINING_TYPE_L_CHARACTER_CLASS}#{JOINING_TYPE_D_CHARACTER_CLASS}]#{JOINING_TYPE_T_CHARACTER_CLASS}*\\u{200C}#{JOINING_TYPE_T_CHARACTER_CLASS}*[#{JOINING_TYPE_R_CHARACTER_CLASS}#{JOINING_TYPE_D_CHARACTER_CLASS}]"
26+
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.3
27+
MIDDLE_DOT = '\u{006C}\u{00B7}\u{006C}'
28+
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.4
29+
GREEK_LOWER_NUMERAL_SIGN = '\u{0375}\p{Greek}'
30+
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.5
31+
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.6
32+
HEBREW_PUNCTUATION = '\p{Hebrew}[\u{05F3}\u{05F4}]'
33+
CONTEXT_REGEX = /(#{ZERO_WIDTH_VIRAMA}|#{ZERO_WIDTH_NON_JOINER_JOINING_TYPE}|#{MIDDLE_DOT}|#{GREEK_LOWER_NUMERAL_SIGN}|#{HEBREW_PUNCTUATION})/.freeze
34+
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.7
35+
KATAKANA_MIDDLE_DOT_REGEX = /\u{30FB}/.freeze
36+
KATAKANA_MIDDLE_DOT_CONTEXT_REGEX = /[\p{Hiragana}\p{Katakana}\p{Han}]/.freeze
37+
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.8
38+
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.9
39+
ARABIC_INDIC_DIGITS_REGEX = /[\u{0660}-\u{0669}]/.freeze
40+
ARABIC_EXTENDED_DIGITS_REGEX = /[\u{06F0}-\u{06F9}]/.freeze
41+
42+
def valid_hostname?(data)
43+
data.split('.').map do |a_label|
44+
return false if a_label.size > 63
45+
u_label = SimpleIDN.to_unicode(a_label)
46+
# https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.3.1
47+
return false if u_label.slice(2, 2) == '--'
48+
return false if ARABIC_INDIC_DIGITS_REGEX.match?(u_label) && ARABIC_EXTENDED_DIGITS_REGEX.match?(u_label)
49+
u_label.gsub!(CONTEXT_REGEX, 'ok')
50+
u_label.gsub!(KATAKANA_MIDDLE_DOT_REGEX, 'ok') if KATAKANA_MIDDLE_DOT_CONTEXT_REGEX.match?(u_label)
51+
u_label
52+
end.join('.').match?(HOSTNAME_REGEX)
53+
rescue SimpleIDN::ConversionError
54+
false
55+
end
56+
end
57+
end
58+
end

lib/json_schemer/schema/base.rb

+1-1
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,7 @@ def validate_instance(instance, &block)
200200

201201
if if_schema && valid_instance?(instance.merge(schema: if_schema, before_property_validation: false, after_property_validation: false))
202202
validate_instance(instance.merge(schema: then_schema, schema_pointer: "#{instance.schema_pointer}/then"), &block) unless then_schema.nil?
203-
elsif if_schema
203+
elsif schema.key?('if')
204204
validate_instance(instance.merge(schema: else_schema, schema_pointer: "#{instance.schema_pointer}/else"), &block) unless else_schema.nil?
205205
end
206206

test/benchmark.rb

+28-19
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,24 @@
11
# frozen_string_literal: true
2-
$LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
2+
require 'bundler/inline'
33

4-
require 'benchmark/ips'
5-
require 'jschema'
6-
require 'json-schema'
7-
require 'json_schema'
8-
require 'json_schemer'
9-
require 'json_validation'
10-
require 'rj_schema'
11-
# require 'jsonschema'
4+
require 'digest' # json_validation
125

13-
# json_validation
14-
require 'digest'
6+
Fixnum = Integer # jsonschema
7+
8+
gemfile do
9+
source 'https://rubygems.org'
10+
11+
gem 'benchmark-ips'
12+
gem 'webrick'
13+
gem 'jschema'
14+
gem 'json-schema'
15+
gem 'json_schema'
16+
gem 'json_validation'
17+
# gem 'jsonschema'
18+
gem 'rj_schema'
19+
20+
gem 'json_schemer', :path => '../'
21+
end
1522

1623
benchmarks = {
1724
'simple' => {
@@ -80,12 +87,14 @@
8087

8188
# json-schema
8289

83-
x.report("json-schema, #{name}, valid") do
84-
raise unless JSON::Validator.validate(schema, valid)
90+
x.report("json-schema, uninitialized, #{name}, valid") do
91+
errors = JSON::Validator.fully_validate(schema, valid)
92+
raise if errors.any?
8593
end
8694

87-
x.report("json-schema, #{name}, invalid") do
88-
raise if JSON::Validator.validate(schema, invalid)
95+
x.report("json-schema, uninitialized, #{name}, invalid") do
96+
errors = JSON::Validator.fully_validate(schema, invalid)
97+
raise if errors.empty?
8998
end
9099

91100
# json_schema
@@ -154,22 +163,22 @@
154163

155164
x.report("rj_schema, uninitialized, #{name}, valid") do
156165
errors = RjSchema::Validator.new.validate(schema, valid)
157-
raise if errors.any?
166+
raise if errors.fetch(:machine_errors).any?
158167
end
159168

160169
x.report("rj_schema, uninitialized, #{name}, invalid") do
161170
errors = RjSchema::Validator.new.validate(schema, invalid)
162-
raise if errors.empty?
171+
raise if errors.fetch(:machine_errors).empty?
163172
end
164173

165174
x.report("rj_schema, initialized, #{name}, valid") do
166175
errors = initialized_rj_schema.validate(:"schema", valid)
167-
raise if errors.any?
176+
raise if errors.fetch(:machine_errors).any?
168177
end
169178

170179
x.report("rj_schema, initialized, #{name}, invalid") do
171180
errors = initialized_rj_schema.validate(:"schema", invalid)
172-
raise if errors.empty?
181+
raise if errors.fetch(:machine_errors).empty?
173182
end
174183

175184
# jsonschema

0 commit comments

Comments
 (0)