Skip to content

Commit fcd3aed

Browse files
committed
Add SCRIPT_LOADERS and API.add_script_loader to allow alternate formats (e.g., YAML-LD) to define loaders for extracting script content.
1 parent 0177952 commit fcd3aed

File tree

1 file changed

+57
-29
lines changed

1 file changed

+57
-29
lines changed

lib/json/ld/api.rb

Lines changed: 57 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -758,6 +758,28 @@ class << self
758758
alias fromRDF fromRdf
759759
end
760760

761+
##
762+
# Hash of recognized script types and the loaders that decode them
763+
# into a hash or array of hashes.
764+
#
765+
# @return Hash{type, Proc}
766+
SCRIPT_LOADERS = {
767+
'application/ld+json' => ->(content, url:, **options) do
768+
validate_input(content, url: url) if options[:validate]
769+
mj_opts = options.keep_if { |k, v| k != :adapter || MUTLI_JSON_ADAPTERS.include?(v) }
770+
MultiJson.load(content, **mj_opts)
771+
end
772+
}
773+
774+
##
775+
# Adds a loader for some specific content type
776+
#
777+
# @param [String] type
778+
# @param [Proc] loader
779+
def self.add_script_loader(type, loader)
780+
SCRIPT_LOADERS[type] = loader
781+
end
782+
761783
##
762784
# Load one or more script tags from an HTML source.
763785
# Unescapes and uncomments input, returns the internal representation
@@ -812,47 +834,53 @@ def self.load_html(input, url:,
812834
element = input.at_xpath("//script[@id='#{id}']")
813835
raise JSON::LD::JsonLdError::LoadingDocumentFailed, "No script tag found with id=#{id}" unless element
814836

815-
unless element.attributes['type'].to_s.start_with?('application/ld+json')
837+
script_type = SCRIPT_LOADERS.keys.detect {|type| element.attributes['type'].to_s.start_with?(type)}
838+
unless script_type
816839
raise JSON::LD::JsonLdError::LoadingDocumentFailed,
817840
"Script tag has type=#{element.attributes['type']}"
818841
end
819842

820-
content = element.inner_html
821-
validate_input(content, url: url) if options[:validate]
822-
mj_opts = options.keep_if { |k, v| k != :adapter || MUTLI_JSON_ADAPTERS.include?(v) }
823-
MultiJson.load(content, **mj_opts)
843+
loader = SCRIPT_LOADERS[script_type]
844+
loader.call(element.inner_html, url: url, **options)
824845
elsif extractAllScripts
825846
res = []
826-
elements = if profile
827-
es = input.xpath("//script[starts-with(@type, 'application/ld+json;profile=#{profile}')]")
828-
# If no profile script, just take a single script without profile
829-
es = [input.at_xpath("//script[starts-with(@type, 'application/ld+json')]")].compact if es.empty?
830-
es
831-
else
832-
input.xpath("//script[starts-with(@type, 'application/ld+json')]")
833-
end
834-
elements.each do |element|
835-
content = element.inner_html
836-
validate_input(content, url: url) if options[:validate]
837-
mj_opts = options.keep_if { |k, v| k != :adapter || MUTLI_JSON_ADAPTERS.include?(v) }
838-
r = MultiJson.load(content, **mj_opts)
839-
if r.is_a?(Hash)
840-
res << r
841-
elsif r.is_a?(Array)
842-
res.concat(r)
847+
848+
SCRIPT_LOADERS.each do |type, loader|
849+
next unless res.empty? # Only load a single type
850+
elements = if profile
851+
es = input.xpath("//script[starts-with(@type, '#{type};profile=#{profile}')]")
852+
# If no profile script, just take a single script without profile
853+
es = [input.at_xpath("//script[starts-with(@type, '#{type}')]")].compact if es.empty?
854+
es
855+
else
856+
input.xpath("//script[starts-with(@type, '#{type}')]")
857+
end
858+
elements.each do |element|
859+
content = element.inner_html
860+
r = loader.call(content, url: url, extractAllScripts: true, **options)
861+
if r.is_a?(Hash)
862+
res << r
863+
elsif r.is_a?(Array)
864+
res.concat(r)
865+
end
843866
end
844867
end
845868
res
846869
else
847-
# Find the first script with type application/ld+json.
848-
element = input.at_xpath("//script[starts-with(@type, 'application/ld+json;profile=#{profile}')]") if profile
849-
element ||= input.at_xpath("//script[starts-with(@type, 'application/ld+json')]")
850-
raise JSON::LD::JsonLdError::LoadingDocumentFailed, "No script tag found" unless element
870+
# Find the first script with a known type
871+
script_type, element = nil, nil
872+
SCRIPT_LOADERS.keys.each do |type|
873+
next if script_type # already found the type
874+
element = input.at_xpath("//script[starts-with(@type, '#{type};profile=#{profile}')]") if profile
875+
element ||= input.at_xpath("//script[starts-with(@type, '#{type}')]")
876+
script_type = type if element
877+
end
878+
unless script_type
879+
raise JSON::LD::JsonLdError::LoadingDocumentFailed, "No script tag found" unless element
880+
end
851881

852882
content = element.inner_html
853-
validate_input(content, url: url) if options[:validate]
854-
mj_opts = options.keep_if { |k, v| k != :adapter || MUTLI_JSON_ADAPTERS.include?(v) }
855-
MultiJson.load(content, **mj_opts)
883+
SCRIPT_LOADERS[script_type].call(content, url: url, **options)
856884
end
857885
rescue MultiJson::ParseError => e
858886
raise JSON::LD::JsonLdError::InvalidScriptElement, e.message

0 commit comments

Comments
 (0)