From edb304dff85a174afe81b977384afdea4d3b8fba Mon Sep 17 00:00:00 2001 From: Misko Hevery Date: Fri, 1 May 2015 16:12:53 -0700 Subject: [PATCH 1/2] chore(sanitize): new implementation of the html sanitized parser This implementation is based on inert document Closes #11442 Closes #11443 --- docs/content/error/$sanitize/badparse.ngdoc | 11 - docs/content/error/$sanitize/ddns.ngdoc | 10 + lib/htmlparser/htmlparser.js | 309 -------------------- src/ngSanitize/sanitize.js | 284 ++++++------------ test/ngSanitize/sanitizeSpec.js | 78 ++--- 5 files changed, 133 insertions(+), 559 deletions(-) delete mode 100644 docs/content/error/$sanitize/badparse.ngdoc create mode 100644 docs/content/error/$sanitize/ddns.ngdoc delete mode 100644 lib/htmlparser/htmlparser.js diff --git a/docs/content/error/$sanitize/badparse.ngdoc b/docs/content/error/$sanitize/badparse.ngdoc deleted file mode 100644 index d07c6d62a403..000000000000 --- a/docs/content/error/$sanitize/badparse.ngdoc +++ /dev/null @@ -1,11 +0,0 @@ -@ngdoc error -@name $sanitize:badparse -@fullName Parsing Error while Sanitizing -@description - -This error occurs when the HTML string passed to '$sanitize' can't be parsed by the sanitizer. -The error contains part of the html string that can't be parsed. - -The parser is more strict than a typical browser parser, so it's possible that some obscure input would produce this error despite the string being recognized as valid HTML by a browser. - -If a valid html code results in this error, please file a bug. diff --git a/docs/content/error/$sanitize/ddns.ngdoc b/docs/content/error/$sanitize/ddns.ngdoc new file mode 100644 index 000000000000..a1f6f77390c8 --- /dev/null +++ b/docs/content/error/$sanitize/ddns.ngdoc @@ -0,0 +1,10 @@ +@ngdoc error +@name $sanitize:ddns +@fullName DOMDocument not supported +@description + +This error occurs when `$sanitize` sanitizer determines that `DOMDocument` api is not supported by the current browser. + +This api is necessary for safe parsing of HTML strings into DOM trees and without it the sanitizer can't sanitize the input. + +The api is present in all supported browsers including IE 9.0, so the presence of this error usually indicates that Angular's `$sanitize` is being used on an unsupported platform. diff --git a/lib/htmlparser/htmlparser.js b/lib/htmlparser/htmlparser.js deleted file mode 100644 index 46a3da08313e..000000000000 --- a/lib/htmlparser/htmlparser.js +++ /dev/null @@ -1,309 +0,0 @@ -/* - * HTML Parser By John Resig (ejohn.org) - * Original code by Erik Arvidsson, Mozilla Public License - * http://erik.eae.net/simplehtmlparser/simplehtmlparser.js - * - * // Use like so: - * htmlParser(htmlString, { - * start: function(tag, attrs, unary) {}, - * end: function(tag) {}, - * chars: function(text) {}, - * comment: function(text) {} - * }); - * - * // or to get an XML string: - * HTMLtoXML(htmlString); - * - * // or to get an XML DOM Document - * HTMLtoDOM(htmlString); - * - * // or to inject into an existing document/DOM node - * HTMLtoDOM(htmlString, document); - * HTMLtoDOM(htmlString, document.body); - * - */ - -(function(){ - - // Regular Expressions for parsing tags and attributes - var startTag = /^<(\w+)((?:\s+\w+(?:\s*=\s*(?:(?:"[^"]*")|(?:'[^']*')|[^>\s]+))?)*)\s*(\/?)>/, - endTag = /^<\/(\w+)[^>]*>/, - attr = /(\w+)(?:\s*=\s*(?:(?:"((?:\\.|[^"])*)")|(?:'((?:\\.|[^'])*)')|([^>\s]+)))?/g; - - // Empty Elements - HTML 4.01 - var empty = makeMap("area,base,basefont,br,col,frame,hr,img,input,isindex,link,meta,param,embed"); - - // Block Elements - HTML 4.01 - var block = makeMap("address,applet,blockquote,button,center,dd,del,dir,div,dl,dt,fieldset,form,frameset,hr,iframe,ins,isindex,li,map,menu,noframes,noscript,object,ol,p,pre,script,table,tbody,td,tfoot,th,thead,tr,ul"); - - // Inline Elements - HTML 4.01 - var inline = makeMap("a,abbr,acronym,applet,b,basefont,bdo,big,br,button,cite,code,del,dfn,em,font,i,iframe,img,input,ins,kbd,label,map,object,q,s,samp,script,select,small,span,strike,strong,sub,sup,textarea,tt,u,var"); - - // Elements that you can, intentionally, leave open - // (and which close themselves) - var closeSelf = makeMap("colgroup,dd,dt,li,options,p,td,tfoot,th,thead,tr"); - - // Attributes that have their values filled in disabled="disabled" - var fillAttrs = makeMap("checked,compact,declare,defer,disabled,ismap,multiple,nohref,noresize,noshade,nowrap,readonly,selected"); - - // Special Elements (can contain anything) - var special = makeMap("script,style"); - - var htmlParser = this.htmlParser = function( html, handler ) { - var index, chars, match, stack = [], last = html; - stack.last = function(){ - return this[ this.length - 1 ]; - }; - - while ( html ) { - chars = true; - - // Make sure we're not in a script or style element - if ( !stack.last() || !special[ stack.last() ] ) { - - // Comment - if ( html.indexOf(""); - - if ( index >= 0 ) { - if ( handler.comment ) - handler.comment( html.substring( 4, index ) ); - html = html.substring( index + 3 ); - chars = false; - } - - // end tag - } else if ( html.indexOf("]*>"), function(all, text){ - text = text.replace(//g, "$1") - .replace(//g, "$1"); - - if ( handler.chars ) - handler.chars( text ); - - return ""; - }); - - parseEndTag( "", stack.last() ); - } - - if ( html == last ) - throw "Parse Error: " + html; - last = html; - } - - // Clean up any remaining tags - parseEndTag(); - - function parseStartTag( tag, tagName, rest, unary ) { - if ( block[ tagName ] ) { - while ( stack.last() && inline[ stack.last() ] ) { - parseEndTag( "", stack.last() ); - } - } - - if ( closeSelf[ tagName ] && stack.last() == tagName ) { - parseEndTag( "", tagName ); - } - - unary = empty[ tagName ] || !!unary; - - if ( !unary ) - stack.push( tagName ); - - if ( handler.start ) { - var attrs = []; - - rest.replace(attr, function(match, name) { - var value = arguments[2] ? arguments[2] : - arguments[3] ? arguments[3] : - arguments[4] ? arguments[4] : - fillAttrs[name] ? name : ""; - - attrs.push({ - name: name, - value: value, - escaped: value.replace(/(^|[^\\])"/g, '$1\\\"') //" - }); - }); - - if ( handler.start ) - handler.start( tagName, attrs, unary ); - } - } - - function parseEndTag( tag, tagName ) { - // If no tag name is provided, clean shop - if ( !tagName ) - var pos = 0; - - // Find the closest opened tag of the same type - else - for ( var pos = stack.length - 1; pos >= 0; pos-- ) - if ( stack[ pos ] == tagName ) - break; - - if ( pos >= 0 ) { - // Close all the open elements, up the stack - for ( var i = stack.length - 1; i >= pos; i-- ) - if ( handler.end ) - handler.end( stack[ i ] ); - - // Remove the open elements from the stack - stack.length = pos; - } - } - }; - - this.HTMLtoXML = function( html ) { - var results = ""; - - htmlParser(html, { - start: function( tag, attrs, unary ) { - results += "<" + tag; - - for ( var i = 0; i < attrs.length; i++ ) - results += " " + attrs[i].name + '="' + attrs[i].escaped + '"'; - - results += (unary ? "/" : "") + ">"; - }, - end: function( tag ) { - results += ""; - }, - chars: function( text ) { - results += text; - }, - comment: function( text ) { - results += ""; - } - }); - - return results; - }; - - this.HTMLtoDOM = function( html, doc ) { - // There can be only one of these elements - var one = makeMap("html,head,body,title"); - - // Enforce a structure for the document - var structure = { - link: "head", - base: "head" - }; - - if ( !doc ) { - if ( typeof DOMDocument != "undefined" ) - doc = new DOMDocument(); - else if ( typeof document != "undefined" && document.implementation && document.implementation.createDocument ) - doc = document.implementation.createDocument("", "", null); - else if ( typeof ActiveX != "undefined" ) - doc = new ActiveXObject("Msxml.DOMDocument"); - - } else - doc = doc.ownerDocument || - doc.getOwnerDocument && doc.getOwnerDocument() || - doc; - - var elems = [], - documentElement = doc.documentElement || - doc.getDocumentElement && doc.getDocumentElement(); - - // If we're dealing with an empty document then we - // need to pre-populate it with the HTML document structure - if ( !documentElement && doc.createElement ) (function(){ - var html = doc.createElement("html"); - var head = doc.createElement("head"); - head.appendChild( doc.createElement("title") ); - html.appendChild( head ); - html.appendChild( doc.createElement("body") ); - doc.appendChild( html ); - })(); - - // Find all the unique elements - if ( doc.getElementsByTagName ) - for ( var i in one ) - one[ i ] = doc.getElementsByTagName( i )[0]; - - // If we're working with a document, inject contents into - // the body element - var curParentNode = one.body; - - htmlParser( html, { - start: function( tagName, attrs, unary ) { - // If it's a pre-built element, then we can ignore - // its construction - if ( one[ tagName ] ) { - curParentNode = one[ tagName ]; - return; - } - - var elem = doc.createElement( tagName ); - - for ( var attr in attrs ) - elem.setAttribute( attrs[ attr ].name, attrs[ attr ].value ); - - if ( structure[ tagName ] && typeof one[ structure[ tagName ] ] != "boolean" ) - one[ structure[ tagName ] ].appendChild( elem ); - - else if ( curParentNode && curParentNode.appendChild ) - curParentNode.appendChild( elem ); - - if ( !unary ) { - elems.push( elem ); - curParentNode = elem; - } - }, - end: function( tag ) { - elems.length -= 1; - - // Init the new parentNode - curParentNode = elems[ elems.length - 1 ]; - }, - chars: function( text ) { - curParentNode.appendChild( doc.createTextNode( text ) ); - }, - comment: function( text ) { - // create comment node - } - }); - - return doc; - }; - - function makeMap(str){ - var obj = {}, items = str.split(","); - for ( var i = 0; i < items.length; i++ ) - obj[ items[i] ] = true; - return obj; - } -})(); \ No newline at end of file diff --git a/src/ngSanitize/sanitize.js b/src/ngSanitize/sanitize.js index 4c7f615dc7d1..0d7011a822c7 100644 --- a/src/ngSanitize/sanitize.js +++ b/src/ngSanitize/sanitize.js @@ -28,23 +28,6 @@ var $sanitizeMinErr = angular.$$minErr('$sanitize'); * See {@link ngSanitize.$sanitize `$sanitize`} for usage. */ -/* - * HTML Parser By Misko Hevery (misko@hevery.com) - * based on: HTML Parser By John Resig (ejohn.org) - * Original code by Erik Arvidsson, Mozilla Public License - * http://erik.eae.net/simplehtmlparser/simplehtmlparser.js - * - * // Use like so: - * htmlParser(htmlString, { - * start: function(tag, attrs, unary) {}, - * end: function(tag) {}, - * chars: function(text) {}, - * comment: function(text) {} - * }); - * - */ - - /** * @ngdoc service * @name $sanitize @@ -164,16 +147,7 @@ function sanitizeText(chars) { // Regular Expressions for parsing tags and attributes -var START_TAG_REGEXP = - /^<((?:[a-zA-Z])[\w:-]*)((?:\s+[\w:-]+(?:\s*=\s*(?:(?:"[^"]*")|(?:'[^']*')|[^>\s]+))?)*)\s*(\/?)\s*(>?)/, - END_TAG_REGEXP = /^<\/\s*([\w:-]+)[^>]*>/, - ATTR_REGEXP = /([\w:-]+)(?:\s*=\s*(?:(?:"((?:[^"])*)")|(?:'((?:[^'])*)')|([^>\s]+)))?/g, - BEGIN_TAG_REGEXP = /^/g, - DOCTYPE_REGEXP = /]*?)>/i, - CDATA_REGEXP = //g, - SURROGATE_PAIR_REGEXP = /[\uD800-\uDBFF][\uDC00-\uDFFF]/g, +var SURROGATE_PAIR_REGEXP = /[\uD800-\uDBFF][\uDC00-\uDFFF]/g, // Match everything outside of normal chars and " (quote character) NON_ALPHANUMERIC_REGEXP = /([^\#-~| |!])/g; @@ -184,23 +158,23 @@ var START_TAG_REGEXP = // Safe Void Elements - HTML5 // http://dev.w3.org/html5/spec/Overview.html#void-elements -var voidElements = makeMap("area,br,col,hr,img,wbr"); +var voidElements = toMap("area,br,col,hr,img,wbr"); // Elements that you can, intentionally, leave open (and which close themselves) // http://dev.w3.org/html5/spec/Overview.html#optional-tags -var optionalEndTagBlockElements = makeMap("colgroup,dd,dt,li,p,tbody,td,tfoot,th,thead,tr"), - optionalEndTagInlineElements = makeMap("rp,rt"), +var optionalEndTagBlockElements = toMap("colgroup,dd,dt,li,p,tbody,td,tfoot,th,thead,tr"), + optionalEndTagInlineElements = toMap("rp,rt"), optionalEndTagElements = angular.extend({}, optionalEndTagInlineElements, optionalEndTagBlockElements); // Safe Block Elements - HTML5 -var blockElements = angular.extend({}, optionalEndTagBlockElements, makeMap("address,article," + +var blockElements = angular.extend({}, optionalEndTagBlockElements, toMap("address,article," + "aside,blockquote,caption,center,del,dir,div,dl,figure,figcaption,footer,h1,h2,h3,h4,h5," + "h6,header,hgroup,hr,ins,map,menu,nav,ol,pre,script,section,table,ul")); // Inline Elements - HTML5 -var inlineElements = angular.extend({}, optionalEndTagInlineElements, makeMap("a,abbr,acronym,b," + +var inlineElements = angular.extend({}, optionalEndTagInlineElements, toMap("a,abbr,acronym,b," + "bdi,bdo,big,br,cite,code,del,dfn,em,font,i,img,ins,kbd,label,map,mark,q,ruby,rp,rt,s," + "samp,small,span,strike,strong,sub,sup,time,tt,u,var")); @@ -208,12 +182,12 @@ var inlineElements = angular.extend({}, optionalEndTagInlineElements, makeMap("a // https://wiki.whatwg.org/wiki/Sanitization_rules#svg_Elements // Note: the elements animate,animateColor,animateMotion,animateTransform,set are intentionally omitted. // They can potentially allow for arbitrary javascript to be executed. See #11290 -var svgElements = makeMap("circle,defs,desc,ellipse,font-face,font-face-name,font-face-src,g,glyph," + +var svgElements = toMap("circle,defs,desc,ellipse,font-face,font-face-name,font-face-src,g,glyph," + "hkern,image,linearGradient,line,marker,metadata,missing-glyph,mpath,path,polygon,polyline," + "radialGradient,rect,stop,svg,switch,text,title,tspan,use"); // Special Elements (can contain anything) -var specialElements = makeMap("script,style"); +var specialElements = toMap("script,style"); var validElements = angular.extend({}, voidElements, @@ -223,9 +197,9 @@ var validElements = angular.extend({}, svgElements); //Attributes that have href and hence need to be sanitized -var uriAttrs = makeMap("background,cite,href,longdesc,src,usemap,xlink:href"); +var uriAttrs = toMap("background,cite,href,longdesc,src,usemap,xlink:href"); -var htmlAttrs = makeMap('abbr,align,alt,axis,bgcolor,border,cellpadding,cellspacing,class,clear,' + +var htmlAttrs = toMap('abbr,align,alt,axis,bgcolor,border,cellpadding,cellspacing,class,clear,' + 'color,cols,colspan,compact,coords,dir,face,headers,height,hreflang,hspace,' + 'ismap,lang,language,nohref,nowrap,rel,rev,rows,rowspan,rules,' + 'scope,scrolling,shape,size,span,start,summary,tabindex,target,title,type,' + @@ -233,7 +207,7 @@ var htmlAttrs = makeMap('abbr,align,alt,axis,bgcolor,border,cellpadding,cellspac // SVG attributes (without "id" and "name" attributes) // https://wiki.whatwg.org/wiki/Sanitization_rules#svg_Attributes -var svgAttrs = makeMap('accent-height,accumulate,additive,alphabetic,arabic-form,ascent,' + +var svgAttrs = toMap('accent-height,accumulate,additive,alphabetic,arabic-form,ascent,' + 'baseProfile,bbox,begin,by,calcMode,cap-height,class,color,color-rendering,content,' + 'cx,cy,d,dx,dy,descent,display,dur,end,fill,fill-rule,font-family,font-size,font-stretch,' + 'font-style,font-variant,font-weight,from,fx,fy,g1,g2,glyph-name,gradientUnits,hanging,' + @@ -254,7 +228,7 @@ var validAttrs = angular.extend({}, svgAttrs, htmlAttrs); -function makeMap(str, lowercaseKeys) { +function toMap(str, lowercaseKeys) { var obj = {}, items = str.split(','), i; for (i = 0; i < items.length; i++) { obj[lowercaseKeys ? angular.lowercase(items[i]) : items[i]] = true; @@ -262,11 +236,34 @@ function makeMap(str, lowercaseKeys) { return obj; } +var baseNode; +(function(window) { + var doc; + if (window.DOMDocument) { + doc = new window.DOMDocument(); + } else if (window.document && window.document.implementation) { + doc = window.document.implementation.createHTMLDocument("inert"); + } else if (window.ActiveXObject) { + doc = new window.ActiveXObject("Msxml.DOMDocument"); + } else { + throw $sanitizeMinErr('ddns', "DOMDocument not supported"); + } + var docElement = doc.documentElement || doc.getDocumentElement(); + var bodyElements = docElement.getElementsByTagName('body'); + if (bodyElements.length === 1) { + baseNode = bodyElements[0]; + } else { + var html = doc.createElement('html'); + baseNode = doc.createElement('body'); + html.appendChild(baseNode); + doc.appendChild(html); + } +})(window); /** * @example * htmlParser(htmlString, { - * start: function(tag, attrs, unary) {}, + * start: function(tag, attrs) {}, * end: function(tag) {}, * chars: function(text) {}, * comment: function(text) {} @@ -276,153 +273,60 @@ function makeMap(str, lowercaseKeys) { * @param {object} handler */ function htmlParser(html, handler) { - if (typeof html !== 'string') { - if (html === null || typeof html === 'undefined') { - html = ''; - } else { - html = '' + html; - } + if (html === null || html === undefined) { + html = ''; + } else if (typeof html !== 'string') { + html = '' + html; } - var index, chars, match, stack = [], last = html, text; - stack.last = function() { return stack[stack.length - 1]; }; - - while (html) { - text = ''; - chars = true; - - // Make sure we're not in a script or style element - if (!stack.last() || !specialElements[stack.last()]) { - - // Comment - if (html.indexOf("", index) === index) { - if (handler.comment) handler.comment(html.substring(4, index)); - html = html.substring(index + 3); - chars = false; - } - // DOCTYPE - } else if (DOCTYPE_REGEXP.test(html)) { - match = html.match(DOCTYPE_REGEXP); - - if (match) { - html = html.replace(match[0], ''); - chars = false; + baseNode.innerHTML = html; + var node = baseNode.firstChild; + while (node) { + switch (node.nodeType) { + case 1: // ELEMENT_NODE + handler.start(node.nodeName.toLowerCase(), attrToMap(node.attributes)); + break; + case 3: // TEXT NODE + handler.chars(node.textContent); + break; + case 8: // COMMENT NODE + handler.comment(node.textContent); + break; + } + var nextNode; + if (!(nextNode = node.firstChild)) { + if (nextNode = node.nextSibling) { + if (node.nodeType == 1) { + handler.end(node.nodeName.toLowerCase()); } - // end tag - } else if (BEGING_END_TAGE_REGEXP.test(html)) { - match = html.match(END_TAG_REGEXP); - - if (match) { - html = html.substring(match[0].length); - match[0].replace(END_TAG_REGEXP, parseEndTag); - chars = false; + } else { + if (node.nodeType == 1) { + handler.end(node.nodeName.toLowerCase()); } - - // start tag - } else if (BEGIN_TAG_REGEXP.test(html)) { - match = html.match(START_TAG_REGEXP); - - if (match) { - // We only have a valid start-tag if there is a '>'. - if (match[4]) { - html = html.substring(match[0].length); - match[0].replace(START_TAG_REGEXP, parseStartTag); + while (nextNode == null) { + node = node.parentNode; + if (node === baseNode) break; + nextNode = node.nextSibling; + if (node.nodeType == 1) { + handler.end(node.nodeName.toLowerCase()); } - chars = false; - } else { - // no ending tag found --- this piece should be encoded as an entity. - text += '<'; - html = html.substring(1); } } - - if (chars) { - index = html.indexOf("<"); - - text += index < 0 ? html : html.substring(0, index); - html = index < 0 ? "" : html.substring(index); - - if (handler.chars) handler.chars(decodeEntities(text)); - } - - } else { - // IE versions 9 and 10 do not understand the regex '[^]', so using a workaround with [\W\w]. - html = html.replace(new RegExp("([\\W\\w]*)<\\s*\\/\\s*" + stack.last() + "[^>]*>", 'i'), - function(all, text) { - text = text.replace(COMMENT_REGEXP, "$1").replace(CDATA_REGEXP, "$1"); - - if (handler.chars) handler.chars(decodeEntities(text)); - - return ""; - }); - - parseEndTag("", stack.last()); } - - if (html == last) { - throw $sanitizeMinErr('badparse', "The sanitizer was unable to parse the following block " + - "of html: {0}", html); - } - last = html; + node = nextNode; } - // Clean up any remaining tags - parseEndTag(); - - function parseStartTag(tag, tagName, rest, unary) { - tagName = angular.lowercase(tagName); - if (blockElements[tagName]) { - while (stack.last() && inlineElements[stack.last()]) { - parseEndTag("", stack.last()); - } - } - - if (optionalEndTagElements[tagName] && stack.last() == tagName) { - parseEndTag("", tagName); - } - - unary = voidElements[tagName] || !!unary; - - if (!unary) { - stack.push(tagName); - } - - var attrs = {}; - - rest.replace(ATTR_REGEXP, - function(match, name, doubleQuotedValue, singleQuotedValue, unquotedValue) { - var value = doubleQuotedValue - || singleQuotedValue - || unquotedValue - || ''; - - attrs[name] = decodeEntities(value); - }); - if (handler.start) handler.start(tagName, attrs, unary); + while (node = baseNode.firstChild) { + baseNode.removeChild(node); } +} - function parseEndTag(tag, tagName) { - var pos = 0, i; - tagName = angular.lowercase(tagName); - if (tagName) { - // Find the closest opened tag of the same type - for (pos = stack.length - 1; pos >= 0; pos--) { - if (stack[pos] == tagName) break; - } - } - - if (pos >= 0) { - // Close all the open elements, up the stack - for (i = stack.length - 1; i >= pos; i--) - if (handler.end) handler.end(stack[i]); - - // Remove the open elements from the stack - stack.length = pos; - } +function attrToMap(attrs) { + var map = {}; + for (var i = 0, ii = attrs.length; i < ii; i++) { + var attr = attrs[i]; + map[attr.name] = attr.value; } + return map; } var hiddenPre=document.createElement("pre"); @@ -466,7 +370,7 @@ function encodeEntities(value) { * create an HTML/XML writer which writes to buffer * @param {Array} buf use buf.jain('') to get out sanitized html string * @returns {object} in the form of { - * start: function(tag, attrs, unary) {}, + * start: function(tag, attrs) {}, * end: function(tag) {}, * chars: function(text) {}, * comment: function(text) {} @@ -476,7 +380,7 @@ function htmlSanitizeWriter(buf, uriValidator) { var ignore = false; var out = angular.bind(buf, buf.push); return { - start: function(tag, attrs, unary) { + start: function(tag, attrs) { tag = angular.lowercase(tag); if (!ignore && specialElements[tag]) { ignore = tag; @@ -496,25 +400,25 @@ function htmlSanitizeWriter(buf, uriValidator) { out('"'); } }); - out(unary ? '/>' : '>'); + out('>'); } }, end: function(tag) { - tag = angular.lowercase(tag); - if (!ignore && validElements[tag] === true) { - out(''); - } - if (tag == ignore) { - ignore = false; - } - }, + tag = angular.lowercase(tag); + if (!ignore && validElements[tag] === true) { + out(''); + } + if (tag == ignore) { + ignore = false; + } + }, chars: function(chars) { - if (!ignore) { - out(encodeEntities(chars)); - } + if (!ignore) { + out(encodeEntities(chars)); } + } }; } diff --git a/test/ngSanitize/sanitizeSpec.js b/test/ngSanitize/sanitizeSpec.js index 33d036c97efc..068991e72282 100644 --- a/test/ngSanitize/sanitizeSpec.js +++ b/test/ngSanitize/sanitizeSpec.js @@ -22,18 +22,21 @@ describe('HTML', function() { var handler, start, text, comment; beforeEach(function() { text = ""; + start = null; handler = { - start: function(tag, attrs, unary) { + start: function(tag, attrs) { start = { tag: tag, - attrs: attrs, - unary: unary + attrs: attrs }; // Since different browsers handle newlines differently we trim // so that it is easier to write tests. - angular.forEach(attrs, function(value, key) { + for (var i = 0, ii = attrs.length; i < ii; i++) { + var keyValue = attrs[i]; + var key = keyValue.key; + var value = keyValue.value; attrs[key] = value.replace(/^\s*/, '').replace(/\s*$/, ''); - }); + } }, chars: function(text_) { text += text_; @@ -52,33 +55,9 @@ describe('HTML', function() { expect(comment).toEqual('FOOBAR'); }); - it('should throw an exception for invalid comments', function() { - var caught=false; - try { - htmlParser('', handler); - } - catch (ex) { - caught = true; - // expected an exception due to a bad parse - } - expect(caught).toBe(true); - }); - - it('double-dashes are not allowed in a comment', function() { - var caught=false; - try { - htmlParser('', handler); - } - catch (ex) { - caught = true; - // expected an exception due to a bad parse - } - expect(caught).toBe(true); - }); - it('should parse basic format', function() { htmlParser('text', handler); - expect(start).toEqual({tag:'tag', attrs:{attr:'value'}, unary:false}); + expect(start).toEqual({tag:'tag', attrs:{attr:'value'}}); expect(text).toEqual('text'); }); @@ -88,15 +67,15 @@ describe('HTML', function() { }); it('should throw badparse if text content contains "<" followed by "/" without matching ">"', function() { - expect(function() { - htmlParser('foo "', function() { - expect(function() { - htmlParser('foo text', handler); - expect(start).toEqual({tag:'tag', attrs:{attr:'value'}, unary:false}); + expect(start).toEqual({tag:'tag', attrs:{attr:'value'}}); expect(text).toEqual('text'); }); it('should parse newlines in attributes', function() { htmlParser('text', handler); - expect(start).toEqual({tag:'tag', attrs:{attr:'value'}, unary:false}); + expect(start).toEqual({tag:'tag', attrs:{attr:'\nvalue\n'}}); expect(text).toEqual('text'); }); it('should parse namespace', function() { htmlParser('text', handler); - expect(start).toEqual({tag:'ns:t-a-g', attrs:{'ns:a-t-t-r':'value'}, unary:false}); + expect(start).toEqual({tag:'ns:t-a-g', attrs:{'ns:a-t-t-r':'\nvalue\n'}}); expect(text).toEqual('text'); }); it('should parse empty value attribute of node', function() { htmlParser('', handler); - expect(start).toEqual({tag:'option', attrs:{selected:'', value:''}, unary:false}); + expect(start).toEqual({tag:'option', attrs:{selected:'', value:''}}); expect(text).toEqual('abc'); }); }); @@ -137,11 +116,12 @@ describe('HTML', function() { }); it('should remove script', function() { - expectHTML('ac.').toEqual('ac.'); }); it('should remove script that has newline characters', function() { - expectHTML('a\n\revil\n\r< / scrIpt\n >c.').toEqual('ac.'); + expectHTML('a\n\revil\n\rc.').toEqual('ac.'); }); it('should remove DOCTYPE header', function() { @@ -173,7 +153,7 @@ describe('HTML', function() { }); it('should remove double nested script', function() { - expectHTML('ailc.').toEqual('ac.'); + expectHTML('ailc.').toEqual('ailc.'); }); it('should remove unknown names', function() { @@ -185,7 +165,7 @@ describe('HTML', function() { }); it('should handle self closed elements', function() { - expectHTML('a
c').toEqual('a
c'); + expectHTML('a
c').toEqual('a
c'); }); it('should handle namespace', function() { @@ -212,7 +192,7 @@ describe('HTML', function() { it('should ignore back slash as escape', function() { expectHTML('xxx\\'). - toEqual('xxx\\'); + toEqual('xxx\\'); }); it('should ignore object attributes', function() { @@ -247,8 +227,8 @@ describe('HTML', function() { }); it('should accept SVG tags', function() { - expectHTML('') - .toEqual(''); + expectHTML('') + .toEqual(''); }); it('should not ignore white-listed svg camelCased attributes', function() { @@ -435,11 +415,11 @@ describe('HTML', function() { inject(function() { $$sanitizeUri.andReturn('someUri'); - expectHTML('').toEqual(''); + expectHTML('').toEqual(''); expect($$sanitizeUri).toHaveBeenCalledWith('someUri', true); $$sanitizeUri.andReturn('unsafe:someUri'); - expectHTML('').toEqual(''); + expectHTML('').toEqual(''); }); }); From 0b39333b150dde8bc9a2ccddff20f27d42a1d574 Mon Sep 17 00:00:00 2001 From: Lucas Galfaso Date: Fri, 31 Jul 2015 22:48:08 +0200 Subject: [PATCH 2/2] fix(sanitize): Do not duplicate self closing elements For HTML self closing elements: * Do not create an end element * Create the element as self closed --- src/ngSanitize/sanitize.js | 21 +++++++++++++-------- test/ngSanitize/filter/linkySpec.js | 4 ++-- test/ngSanitize/sanitizeSpec.js | 18 ++++++++++-------- 3 files changed, 25 insertions(+), 18 deletions(-) diff --git a/src/ngSanitize/sanitize.js b/src/ngSanitize/sanitize.js index 0d7011a822c7..a102b4108ee6 100644 --- a/src/ngSanitize/sanitize.js +++ b/src/ngSanitize/sanitize.js @@ -189,6 +189,9 @@ var svgElements = toMap("circle,defs,desc,ellipse,font-face,font-face-name,font- // Special Elements (can contain anything) var specialElements = toMap("script,style"); +var selfClosingElements = toMap("area,base,br,col,command,embed,hr,img,input,keygen,link,meta,param,source," + + "track,wbr"); + var validElements = angular.extend({}, voidElements, blockElements, @@ -283,7 +286,8 @@ function htmlParser(html, handler) { while (node) { switch (node.nodeType) { case 1: // ELEMENT_NODE - handler.start(node.nodeName.toLowerCase(), attrToMap(node.attributes)); + handler.start(node.nodeName.toLowerCase(), attrToMap(node.attributes), + selfClosingElements[node.nodeName.toLowerCase()]); break; case 3: // TEXT NODE handler.chars(node.textContent); @@ -295,18 +299,18 @@ function htmlParser(html, handler) { var nextNode; if (!(nextNode = node.firstChild)) { if (nextNode = node.nextSibling) { - if (node.nodeType == 1) { + if (node.nodeType == 1 && !selfClosingElements[node.nodeName.toLowerCase()]) { handler.end(node.nodeName.toLowerCase()); } } else { - if (node.nodeType == 1) { + if (node.nodeType == 1 && !selfClosingElements[node.nodeName.toLowerCase()]) { handler.end(node.nodeName.toLowerCase()); } while (nextNode == null) { node = node.parentNode; if (node === baseNode) break; nextNode = node.nextSibling; - if (node.nodeType == 1) { + if (node.nodeType == 1 && !selfClosingElements[node.nodeName.toLowerCase()]) { handler.end(node.nodeName.toLowerCase()); } } @@ -370,7 +374,7 @@ function encodeEntities(value) { * create an HTML/XML writer which writes to buffer * @param {Array} buf use buf.jain('') to get out sanitized html string * @returns {object} in the form of { - * start: function(tag, attrs) {}, + * start: function(tag, attrs, selfClose) {}, * end: function(tag) {}, * chars: function(text) {}, * comment: function(text) {} @@ -380,7 +384,7 @@ function htmlSanitizeWriter(buf, uriValidator) { var ignore = false; var out = angular.bind(buf, buf.push); return { - start: function(tag, attrs) { + start: function(tag, attrs, selfClose) { tag = angular.lowercase(tag); if (!ignore && specialElements[tag]) { ignore = tag; @@ -388,7 +392,8 @@ function htmlSanitizeWriter(buf, uriValidator) { if (!ignore && validElements[tag] === true) { out('<'); out(tag); - angular.forEach(attrs, function(value, key) { + angular.forEach(Object.keys(attrs).sort(), function(key) { + var value = attrs[key]; var lkey=angular.lowercase(key); var isImage = (tag === 'img' && lkey === 'src') || (lkey === 'background'); if (validAttrs[lkey] === true && @@ -400,7 +405,7 @@ function htmlSanitizeWriter(buf, uriValidator) { out('"'); } }); - out('>'); + out(selfClose ? '/>' : '>'); } }, end: function(tag) { diff --git a/test/ngSanitize/filter/linkySpec.js b/test/ngSanitize/filter/linkySpec.js index f0c4f3956e54..74cdd25596e7 100644 --- a/test/ngSanitize/filter/linkySpec.js +++ b/test/ngSanitize/filter/linkySpec.js @@ -50,8 +50,8 @@ describe('linky', function() { it('should handle target:', function() { expect(linky("http://example.com", "_blank")). - toEqual('
http://example.com'); + toEqual('http://example.com'); expect(linky("http://example.com", "someNamedIFrame")). - toEqual('http://example.com'); + toEqual('http://example.com'); }); }); diff --git a/test/ngSanitize/sanitizeSpec.js b/test/ngSanitize/sanitizeSpec.js index 068991e72282..8490bf828b4b 100644 --- a/test/ngSanitize/sanitizeSpec.js +++ b/test/ngSanitize/sanitizeSpec.js @@ -112,7 +112,7 @@ describe('HTML', function() { // THESE TESTS ARE EXECUTED WITH COMPILED ANGULAR it('should echo html', function() { expectHTML('helloworld.'). - toEqual('helloworld.'); + toEqual('helloworld.'); }); it('should remove script', function() { @@ -165,7 +165,9 @@ describe('HTML', function() { }); it('should handle self closed elements', function() { - expectHTML('a
c').toEqual('a
c'); + expectHTML('a
c').toEqual('a
c'); + expectHTML('
').toEqual('
'); + expectHTML('
').toEqual('
'); }); it('should handle namespace', function() { @@ -192,7 +194,7 @@ describe('HTML', function() { it('should ignore back slash as escape', function() { expectHTML('xxx\\'). - toEqual('xxx\\'); + toEqual('xxx\\'); }); it('should ignore object attributes', function() { @@ -227,8 +229,8 @@ describe('HTML', function() { }); it('should accept SVG tags', function() { - expectHTML('') - .toEqual(''); + expectHTML('') + .toEqual(''); }); it('should not ignore white-listed svg camelCased attributes', function() { @@ -259,7 +261,7 @@ describe('HTML', function() { expectHTML('' + '') - .toEqual(''); + .toEqual(''); }); describe('htmlSanitizerWriter', function() { @@ -415,11 +417,11 @@ describe('HTML', function() { inject(function() { $$sanitizeUri.andReturn('someUri'); - expectHTML('').toEqual(''); + expectHTML('').toEqual(''); expect($$sanitizeUri).toHaveBeenCalledWith('someUri', true); $$sanitizeUri.andReturn('unsafe:someUri'); - expectHTML('').toEqual(''); + expectHTML('').toEqual(''); }); });