@@ -758,6 +758,28 @@ class << self
758
758
alias fromRDF fromRdf
759
759
end
760
760
761
+ ##
762
+ # Hash of recognized script types and the loaders that decode them
763
+ # into a hash or array of hashes.
764
+ #
765
+ # @return Hash{type, Proc}
766
+ SCRIPT_LOADERS = {
767
+ 'application/ld+json' => -> ( content , url :, **options ) do
768
+ validate_input ( content , url : url ) if options [ :validate ]
769
+ mj_opts = options . keep_if { |k , v | k != :adapter || MUTLI_JSON_ADAPTERS . include? ( v ) }
770
+ MultiJson . load ( content , **mj_opts )
771
+ end
772
+ }
773
+
774
+ ##
775
+ # Adds a loader for some specific content type
776
+ #
777
+ # @param [String] type
778
+ # @param [Proc] loader
779
+ def self . add_script_loader ( type , loader )
780
+ SCRIPT_LOADERS [ type ] = loader
781
+ end
782
+
761
783
##
762
784
# Load one or more script tags from an HTML source.
763
785
# Unescapes and uncomments input, returns the internal representation
@@ -812,47 +834,53 @@ def self.load_html(input, url:,
812
834
element = input . at_xpath ( "//script[@id='#{ id } ']" )
813
835
raise JSON ::LD ::JsonLdError ::LoadingDocumentFailed , "No script tag found with id=#{ id } " unless element
814
836
815
- unless element . attributes [ 'type' ] . to_s . start_with? ( 'application/ld+json' )
837
+ script_type = SCRIPT_LOADERS . keys . detect { |type | element . attributes [ 'type' ] . to_s . start_with? ( type ) }
838
+ unless script_type
816
839
raise JSON ::LD ::JsonLdError ::LoadingDocumentFailed ,
817
840
"Script tag has type=#{ element . attributes [ 'type' ] } "
818
841
end
819
842
820
- content = element . inner_html
821
- validate_input ( content , url : url ) if options [ :validate ]
822
- mj_opts = options . keep_if { |k , v | k != :adapter || MUTLI_JSON_ADAPTERS . include? ( v ) }
823
- MultiJson . load ( content , **mj_opts )
843
+ loader = SCRIPT_LOADERS [ script_type ]
844
+ loader . call ( element . inner_html , url : url , **options )
824
845
elsif extractAllScripts
825
846
res = [ ]
826
- elements = if profile
827
- es = input . xpath ( "//script[starts-with(@type, 'application/ld+json;profile=#{ profile } ')]" )
828
- # If no profile script, just take a single script without profile
829
- es = [ input . at_xpath ( "//script[starts-with(@type, 'application/ld+json')]" ) ] . compact if es . empty?
830
- es
831
- else
832
- input . xpath ( "//script[starts-with(@type, 'application/ld+json')]" )
833
- end
834
- elements . each do |element |
835
- content = element . inner_html
836
- validate_input ( content , url : url ) if options [ :validate ]
837
- mj_opts = options . keep_if { |k , v | k != :adapter || MUTLI_JSON_ADAPTERS . include? ( v ) }
838
- r = MultiJson . load ( content , **mj_opts )
839
- if r . is_a? ( Hash )
840
- res << r
841
- elsif r . is_a? ( Array )
842
- res . concat ( r )
847
+
848
+ SCRIPT_LOADERS . each do |type , loader |
849
+ next unless res . empty? # Only load a single type
850
+ elements = if profile
851
+ es = input . xpath ( "//script[starts-with(@type, '#{ type } ;profile=#{ profile } ')]" )
852
+ # If no profile script, just take a single script without profile
853
+ es = [ input . at_xpath ( "//script[starts-with(@type, '#{ type } ')]" ) ] . compact if es . empty?
854
+ es
855
+ else
856
+ input . xpath ( "//script[starts-with(@type, '#{ type } ')]" )
857
+ end
858
+ elements . each do |element |
859
+ content = element . inner_html
860
+ r = loader . call ( content , url : url , extractAllScripts : true , **options )
861
+ if r . is_a? ( Hash )
862
+ res << r
863
+ elsif r . is_a? ( Array )
864
+ res . concat ( r )
865
+ end
843
866
end
844
867
end
845
868
res
846
869
else
847
- # Find the first script with type application/ld+json.
848
- element = input . at_xpath ( "//script[starts-with(@type, 'application/ld+json;profile=#{ profile } ')]" ) if profile
849
- element ||= input . at_xpath ( "//script[starts-with(@type, 'application/ld+json')]" )
850
- raise JSON ::LD ::JsonLdError ::LoadingDocumentFailed , "No script tag found" unless element
870
+ # Find the first script with a known type
871
+ script_type , element = nil , nil
872
+ SCRIPT_LOADERS . keys . each do |type |
873
+ next if script_type # already found the type
874
+ element = input . at_xpath ( "//script[starts-with(@type, '#{ type } ;profile=#{ profile } ')]" ) if profile
875
+ element ||= input . at_xpath ( "//script[starts-with(@type, '#{ type } ')]" )
876
+ script_type = type if element
877
+ end
878
+ unless script_type
879
+ raise JSON ::LD ::JsonLdError ::LoadingDocumentFailed , "No script tag found" unless element
880
+ end
851
881
852
882
content = element . inner_html
853
- validate_input ( content , url : url ) if options [ :validate ]
854
- mj_opts = options . keep_if { |k , v | k != :adapter || MUTLI_JSON_ADAPTERS . include? ( v ) }
855
- MultiJson . load ( content , **mj_opts )
883
+ SCRIPT_LOADERS [ script_type ] . call ( content , url : url , **options )
856
884
end
857
885
rescue MultiJson ::ParseError => e
858
886
raise JSON ::LD ::JsonLdError ::InvalidScriptElement , e . message
0 commit comments