15
15
require 'fileutils'
16
16
require 'colorize'
17
17
require 'yaml'
18
+ require 'cgi'
18
19
19
20
PREFIXES = {
20
21
dc : "http://purl.org/dc/terms/" ,
49
50
# Remove highlighting and commented out sections
50
51
def justify ( str )
51
52
str = str .
52
- sub ( /^\s *<!--\s *$/ , '' ) .
53
- sub ( /^\s *-->\s *$/ , '' ) .
53
+ gsub ( /^\s *<!--\s *$/ , '' ) .
54
+ gsub ( /^\s *-->\s *$/ , '' ) .
54
55
gsub ( '****' , '' ) .
55
56
gsub ( /####([^#]*)####/ , '' )
56
57
@@ -222,7 +223,7 @@ def save_example(examples:, element:, title:, example_number:, error:, warn:)
222
223
examples [ title ] = {
223
224
title : title ,
224
225
filename : fn ,
225
- content : content ,
226
+ content : content . to_s . gsub ( /^ \s *< ! \s *- \s *-/ , '<!--' ) . gsub ( /- \s *- >/ , '-->' ) ,
226
227
content_type : element . attr ( 'data-content-type' ) ,
227
228
number : example_number ,
228
229
ext : ext ,
@@ -302,6 +303,7 @@ def save_example(examples:, element:, title:, example_number:, error:, warn:)
302
303
# Perform example syntactic validation based on extension
303
304
case ex [ :ext ]
304
305
when 'json' , 'jsonld' , 'jsonldf'
306
+ content = CGI . unescapeHTML ( content )
305
307
begin
306
308
::JSON . parse ( content )
307
309
rescue JSON ::ParserError => exception
@@ -325,22 +327,17 @@ def save_example(examples:, element:, title:, example_number:, error:, warn:)
325
327
ex [ :base ] = html_base . to_s if html_base
326
328
327
329
script_content = doc . at_xpath ( xpath )
328
- if script_content
329
- # Remove (faked) XML comments and unescape sequences
330
- content = script_content
331
- . inner_html
332
- . sub ( /^\s *< !\s *-\s *-/ , '' )
333
- . sub ( /-\s *- >\s *$/ , '' )
334
- . gsub ( /</ , '<' )
335
- end
336
-
330
+
331
+ # Remove (faked) XML comments and unescape sequences
332
+ content = CGI . unescapeHTML ( script_content . inner_html ) if script_content
337
333
rescue Nokogiri ::XML ::SyntaxError => exception
338
334
errors << "Example #{ ex [ :number ] } at line #{ ex [ :line ] } parse error: #{ exception . message } "
339
335
$stdout. write "F" . colorize ( :red )
340
336
next
341
337
end
342
338
when 'table'
343
- # already in parsed form
339
+ doc = Nokogiri ::HTML . parse ( content ) { |c | c . strict }
340
+ content = CGI . unescapeHTML ( doc . inner_html )
344
341
when 'ttl' , 'trig'
345
342
begin
346
343
reader_errors = [ ]
@@ -443,10 +440,7 @@ def save_example(examples:, element:, title:, example_number:, error:, warn:)
443
440
# Set argument to referenced content to be parsed
444
441
args [ 0 ] = if examples [ ex [ :result_for ] ] [ :ext ] == 'html' && method == :expand
445
442
# If we are expanding, and the reference is HTML, find the first script element.
446
- doc = Nokogiri ::HTML . parse (
447
- examples [ ex [ :result_for ] ] [ :content ]
448
- . sub ( /^\s *< !\s *-\s *-/ , '' )
449
- . sub ( /-\s *- >\s *$/ , '' ) )
443
+ doc = Nokogiri ::HTML . parse ( examples [ ex [ :result_for ] ] [ :content ] )
450
444
451
445
# Get base from document, if present
452
446
html_base = doc . at_xpath ( '/html/head/base/@href' )
@@ -458,15 +452,10 @@ def save_example(examples:, element:, title:, example_number:, error:, warn:)
458
452
$stdout. write "F" . colorize ( :red )
459
453
next
460
454
end
461
- StringIO . new ( script_content
462
- . inner_html
463
- . gsub ( /</ , '<' ) )
455
+ StringIO . new ( CGI . unescapeHTML ( script_content . inner_html ) )
464
456
elsif examples [ ex [ :result_for ] ] [ :ext ] == 'html' && ex [ :target ]
465
457
# Only use the targeted script
466
- doc = Nokogiri ::HTML . parse (
467
- examples [ ex [ :result_for ] ] [ :content ]
468
- . sub ( /^\s *< !\s *-\s *-/ , '' )
469
- . sub ( /-\s *- >\s *$/ , '' ) )
458
+ doc = Nokogiri ::HTML . parse ( examples [ ex [ :result_for ] ] [ :content ] )
470
459
script_content = doc . at_xpath ( xpath )
471
460
unless script_content
472
461
errors << "Example #{ ex [ :number ] } at line #{ ex [ :line ] } references example #{ ex [ :result_for ] . inspect } with no JSON-LD script element"
@@ -565,7 +554,8 @@ def save_example(examples:, element:, title:, example_number:, error:, warn:)
565
554
$stderr. puts "expected:\n " + expected . to_trig if verbose
566
555
when 'table'
567
556
expected = begin
568
- table_to_dataset ( content )
557
+ doc = Nokogiri ::HTML . parse ( content )
558
+ table_to_dataset ( doc )
569
559
rescue
570
560
errors << "Example #{ ex [ :number ] } at line #{ ex [ :line ] } raised error reading table: #{ $!} "
571
561
RDF ::Dataset . new
0 commit comments