From edb304dff85a174afe81b977384afdea4d3b8fba Mon Sep 17 00:00:00 2001
From: Misko Hevery <misko@hevery.com>
Date: Fri, 1 May 2015 16:12:53 -0700
Subject: [PATCH 1/2] chore(sanitize): new implementation of the html sanitized
 parser

This implementation is based on inert document

Closes #11442
Closes #11443
---
 docs/content/error/$sanitize/badparse.ngdoc |  11 -
 docs/content/error/$sanitize/ddns.ngdoc     |  10 +
 lib/htmlparser/htmlparser.js                | 309 --------------------
 src/ngSanitize/sanitize.js                  | 284 ++++++------------
 test/ngSanitize/sanitizeSpec.js             |  78 ++---
 5 files changed, 133 insertions(+), 559 deletions(-)
 delete mode 100644 docs/content/error/$sanitize/badparse.ngdoc
 create mode 100644 docs/content/error/$sanitize/ddns.ngdoc
 delete mode 100644 lib/htmlparser/htmlparser.js

diff --git a/docs/content/error/$sanitize/badparse.ngdoc b/docs/content/error/$sanitize/badparse.ngdoc
deleted file mode 100644
index d07c6d62a403..000000000000
--- a/docs/content/error/$sanitize/badparse.ngdoc
+++ /dev/null
@@ -1,11 +0,0 @@
-@ngdoc error
-@name $sanitize:badparse
-@fullName Parsing Error while Sanitizing
-@description
-
-This error occurs when the HTML string passed to '$sanitize' can't be parsed by the sanitizer.
-The error contains part of the html string that can't be parsed.
-
-The parser is more strict than a typical browser parser, so it's possible that some obscure input would produce this error despite the string being recognized as valid HTML by a browser.
-
-If a valid html code results in this error, please file a bug.
diff --git a/docs/content/error/$sanitize/ddns.ngdoc b/docs/content/error/$sanitize/ddns.ngdoc
new file mode 100644
index 000000000000..a1f6f77390c8
--- /dev/null
+++ b/docs/content/error/$sanitize/ddns.ngdoc
@@ -0,0 +1,10 @@
+@ngdoc error
+@name $sanitize:ddns
+@fullName DOMDocument not supported
+@description
+
+This error occurs when `$sanitize` sanitizer determines that `DOMDocument` api is not supported by the current browser.
+
+This api is necessary for safe parsing of HTML strings into DOM trees and without it the sanitizer can't sanitize the input.
+
+The api is present in all supported browsers including IE 9.0, so the presence of this error usually indicates that Angular's `$sanitize` is being used on an unsupported platform.
diff --git a/lib/htmlparser/htmlparser.js b/lib/htmlparser/htmlparser.js
deleted file mode 100644
index 46a3da08313e..000000000000
--- a/lib/htmlparser/htmlparser.js
+++ /dev/null
@@ -1,309 +0,0 @@
-/*
- * HTML Parser By John Resig (ejohn.org)
- * Original code by Erik Arvidsson, Mozilla Public License
- * http://erik.eae.net/simplehtmlparser/simplehtmlparser.js
- *
- * // Use like so:
- * htmlParser(htmlString, {
- *     start: function(tag, attrs, unary) {},
- *     end: function(tag) {},
- *     chars: function(text) {},
- *     comment: function(text) {}
- * });
- *
- * // or to get an XML string:
- * HTMLtoXML(htmlString);
- *
- * // or to get an XML DOM Document
- * HTMLtoDOM(htmlString);
- *
- * // or to inject into an existing document/DOM node
- * HTMLtoDOM(htmlString, document);
- * HTMLtoDOM(htmlString, document.body);
- *
- */
-
-(function(){
-
-  // Regular Expressions for parsing tags and attributes
-  var startTag = /^<(\w+)((?:\s+\w+(?:\s*=\s*(?:(?:"[^"]*")|(?:'[^']*')|[^>\s]+))?)*)\s*(\/?)>/,
-    endTag = /^<\/(\w+)[^>]*>/,
-    attr = /(\w+)(?:\s*=\s*(?:(?:"((?:\\.|[^"])*)")|(?:'((?:\\.|[^'])*)')|([^>\s]+)))?/g;
-
-  // Empty Elements - HTML 4.01
-  var empty = makeMap("area,base,basefont,br,col,frame,hr,img,input,isindex,link,meta,param,embed");
-
-  // Block Elements - HTML 4.01
-  var block = makeMap("address,applet,blockquote,button,center,dd,del,dir,div,dl,dt,fieldset,form,frameset,hr,iframe,ins,isindex,li,map,menu,noframes,noscript,object,ol,p,pre,script,table,tbody,td,tfoot,th,thead,tr,ul");
-
-  // Inline Elements - HTML 4.01
-  var inline = makeMap("a,abbr,acronym,applet,b,basefont,bdo,big,br,button,cite,code,del,dfn,em,font,i,iframe,img,input,ins,kbd,label,map,object,q,s,samp,script,select,small,span,strike,strong,sub,sup,textarea,tt,u,var");
-
-  // Elements that you can, intentionally, leave open
-  // (and which close themselves)
-  var closeSelf = makeMap("colgroup,dd,dt,li,options,p,td,tfoot,th,thead,tr");
-
-  // Attributes that have their values filled in disabled="disabled"
-  var fillAttrs = makeMap("checked,compact,declare,defer,disabled,ismap,multiple,nohref,noresize,noshade,nowrap,readonly,selected");
-
-  // Special Elements (can contain anything)
-  var special = makeMap("script,style");
-
-  var htmlParser = this.htmlParser = function( html, handler ) {
-    var index, chars, match, stack = [], last = html;
-    stack.last = function(){
-      return this[ this.length - 1 ];
-    };
-
-    while ( html ) {
-      chars = true;
-
-      // Make sure we're not in a script or style element
-      if ( !stack.last() || !special[ stack.last() ] ) {
-
-        // Comment
-        if ( html.indexOf("<!--") == 0 ) {
-          index = html.indexOf("-->");
-
-          if ( index >= 0 ) {
-            if ( handler.comment )
-              handler.comment( html.substring( 4, index ) );
-            html = html.substring( index + 3 );
-            chars = false;
-          }
-
-        // end tag
-        } else if ( html.indexOf("</") == 0 ) {
-          match = html.match( endTag );
-
-          if ( match ) {
-            html = html.substring( match[0].length );
-            match[0].replace( endTag, parseEndTag );
-            chars = false;
-          }
-
-        // start tag
-        } else if ( html.indexOf("<") == 0 ) {
-          match = html.match( startTag );
-
-          if ( match ) {
-            html = html.substring( match[0].length );
-            match[0].replace( startTag, parseStartTag );
-            chars = false;
-          }
-        }
-
-        if ( chars ) {
-          index = html.indexOf("<");
-
-          var text = index < 0 ? html : html.substring( 0, index );
-          html = index < 0 ? "" : html.substring( index );
-
-          if ( handler.chars )
-            handler.chars( text );
-        }
-
-      } else {
-        html = html.replace(new RegExp("(.*)<\/" + stack.last() + "[^>]*>"), function(all, text){
-          text = text.replace(/<!--(.*?)-->/g, "$1")
-            .replace(/<!\[CDATA\[(.*?)]]>/g, "$1");
-
-          if ( handler.chars )
-            handler.chars( text );
-
-          return "";
-        });
-
-        parseEndTag( "", stack.last() );
-      }
-
-      if ( html == last )
-        throw "Parse Error: " + html;
-      last = html;
-    }
-
-    // Clean up any remaining tags
-    parseEndTag();
-
-    function parseStartTag( tag, tagName, rest, unary ) {
-      if ( block[ tagName ] ) {
-        while ( stack.last() && inline[ stack.last() ] ) {
-          parseEndTag( "", stack.last() );
-        }
-      }
-
-      if ( closeSelf[ tagName ] && stack.last() == tagName ) {
-        parseEndTag( "", tagName );
-      }
-
-      unary = empty[ tagName ] || !!unary;
-
-      if ( !unary )
-        stack.push( tagName );
-
-      if ( handler.start ) {
-        var attrs = [];
-
-        rest.replace(attr, function(match, name) {
-          var value = arguments[2] ? arguments[2] :
-            arguments[3] ? arguments[3] :
-            arguments[4] ? arguments[4] :
-            fillAttrs[name] ? name : "";
-
-          attrs.push({
-            name: name,
-            value: value,
-            escaped: value.replace(/(^|[^\\])"/g, '$1\\\"') //"
-          });
-        });
-
-        if ( handler.start )
-          handler.start( tagName, attrs, unary );
-      }
-    }
-
-    function parseEndTag( tag, tagName ) {
-      // If no tag name is provided, clean shop
-      if ( !tagName )
-        var pos = 0;
-
-      // Find the closest opened tag of the same type
-      else
-        for ( var pos = stack.length - 1; pos >= 0; pos-- )
-          if ( stack[ pos ] == tagName )
-            break;
-
-      if ( pos >= 0 ) {
-        // Close all the open elements, up the stack
-        for ( var i = stack.length - 1; i >= pos; i-- )
-          if ( handler.end )
-            handler.end( stack[ i ] );
-
-        // Remove the open elements from the stack
-        stack.length = pos;
-      }
-    }
-  };
-
-  this.HTMLtoXML = function( html ) {
-    var results = "";
-
-    htmlParser(html, {
-      start: function( tag, attrs, unary ) {
-        results += "<" + tag;
-
-        for ( var i = 0; i < attrs.length; i++ )
-          results += " " + attrs[i].name + '="' + attrs[i].escaped + '"';
-
-        results += (unary ? "/" : "") + ">";
-      },
-      end: function( tag ) {
-        results += "</" + tag + ">";
-      },
-      chars: function( text ) {
-        results += text;
-      },
-      comment: function( text ) {
-        results += "<!--" + text + "-->";
-      }
-    });
-
-    return results;
-  };
-
-  this.HTMLtoDOM = function( html, doc ) {
-    // There can be only one of these elements
-    var one = makeMap("html,head,body,title");
-
-    // Enforce a structure for the document
-    var structure = {
-      link: "head",
-      base: "head"
-    };
-
-    if ( !doc ) {
-      if ( typeof DOMDocument != "undefined" )
-        doc = new DOMDocument();
-      else if ( typeof document != "undefined" && document.implementation && document.implementation.createDocument )
-        doc = document.implementation.createDocument("", "", null);
-      else if ( typeof ActiveX != "undefined" )
-        doc = new ActiveXObject("Msxml.DOMDocument");
-
-    } else
-      doc = doc.ownerDocument ||
-        doc.getOwnerDocument && doc.getOwnerDocument() ||
-        doc;
-
-    var elems = [],
-      documentElement = doc.documentElement ||
-        doc.getDocumentElement && doc.getDocumentElement();
-
-    // If we're dealing with an empty document then we
-    // need to pre-populate it with the HTML document structure
-    if ( !documentElement && doc.createElement ) (function(){
-      var html = doc.createElement("html");
-      var head = doc.createElement("head");
-      head.appendChild( doc.createElement("title") );
-      html.appendChild( head );
-      html.appendChild( doc.createElement("body") );
-      doc.appendChild( html );
-    })();
-
-    // Find all the unique elements
-    if ( doc.getElementsByTagName )
-      for ( var i in one )
-        one[ i ] = doc.getElementsByTagName( i )[0];
-
-    // If we're working with a document, inject contents into
-    // the body element
-    var curParentNode = one.body;
-
-    htmlParser( html, {
-      start: function( tagName, attrs, unary ) {
-        // If it's a pre-built element, then we can ignore
-        // its construction
-        if ( one[ tagName ] ) {
-          curParentNode = one[ tagName ];
-          return;
-        }
-
-        var elem = doc.createElement( tagName );
-
-        for ( var attr in attrs )
-          elem.setAttribute( attrs[ attr ].name, attrs[ attr ].value );
-
-        if ( structure[ tagName ] && typeof one[ structure[ tagName ] ] != "boolean" )
-          one[ structure[ tagName ] ].appendChild( elem );
-
-        else if ( curParentNode && curParentNode.appendChild )
-          curParentNode.appendChild( elem );
-
-        if ( !unary ) {
-          elems.push( elem );
-          curParentNode = elem;
-        }
-      },
-      end: function( tag ) {
-        elems.length -= 1;
-
-        // Init the new parentNode
-        curParentNode = elems[ elems.length - 1 ];
-      },
-      chars: function( text ) {
-        curParentNode.appendChild( doc.createTextNode( text ) );
-      },
-      comment: function( text ) {
-        // create comment node
-      }
-    });
-
-    return doc;
-  };
-
-  function makeMap(str){
-    var obj = {}, items = str.split(",");
-    for ( var i = 0; i < items.length; i++ )
-      obj[ items[i] ] = true;
-    return obj;
-  }
-})();
\ No newline at end of file
diff --git a/src/ngSanitize/sanitize.js b/src/ngSanitize/sanitize.js
index 4c7f615dc7d1..0d7011a822c7 100644
--- a/src/ngSanitize/sanitize.js
+++ b/src/ngSanitize/sanitize.js
@@ -28,23 +28,6 @@ var $sanitizeMinErr = angular.$$minErr('$sanitize');
  * See {@link ngSanitize.$sanitize `$sanitize`} for usage.
  */
 
-/*
- * HTML Parser By Misko Hevery (misko@hevery.com)
- * based on:  HTML Parser By John Resig (ejohn.org)
- * Original code by Erik Arvidsson, Mozilla Public License
- * http://erik.eae.net/simplehtmlparser/simplehtmlparser.js
- *
- * // Use like so:
- * htmlParser(htmlString, {
- *     start: function(tag, attrs, unary) {},
- *     end: function(tag) {},
- *     chars: function(text) {},
- *     comment: function(text) {}
- * });
- *
- */
-
-
 /**
  * @ngdoc service
  * @name $sanitize
@@ -164,16 +147,7 @@ function sanitizeText(chars) {
 
 
 // Regular Expressions for parsing tags and attributes
-var START_TAG_REGEXP =
-       /^<((?:[a-zA-Z])[\w:-]*)((?:\s+[\w:-]+(?:\s*=\s*(?:(?:"[^"]*")|(?:'[^']*')|[^>\s]+))?)*)\s*(\/?)\s*(>?)/,
-  END_TAG_REGEXP = /^<\/\s*([\w:-]+)[^>]*>/,
-  ATTR_REGEXP = /([\w:-]+)(?:\s*=\s*(?:(?:"((?:[^"])*)")|(?:'((?:[^'])*)')|([^>\s]+)))?/g,
-  BEGIN_TAG_REGEXP = /^</,
-  BEGING_END_TAGE_REGEXP = /^<\//,
-  COMMENT_REGEXP = /<!--(.*?)-->/g,
-  DOCTYPE_REGEXP = /<!DOCTYPE([^>]*?)>/i,
-  CDATA_REGEXP = /<!\[CDATA\[(.*?)]]>/g,
-  SURROGATE_PAIR_REGEXP = /[\uD800-\uDBFF][\uDC00-\uDFFF]/g,
+var SURROGATE_PAIR_REGEXP = /[\uD800-\uDBFF][\uDC00-\uDFFF]/g,
   // Match everything outside of normal chars and " (quote character)
   NON_ALPHANUMERIC_REGEXP = /([^\#-~| |!])/g;
 
@@ -184,23 +158,23 @@ var START_TAG_REGEXP =
 
 // Safe Void Elements - HTML5
 // http://dev.w3.org/html5/spec/Overview.html#void-elements
-var voidElements = makeMap("area,br,col,hr,img,wbr");
+var voidElements = toMap("area,br,col,hr,img,wbr");
 
 // Elements that you can, intentionally, leave open (and which close themselves)
 // http://dev.w3.org/html5/spec/Overview.html#optional-tags
-var optionalEndTagBlockElements = makeMap("colgroup,dd,dt,li,p,tbody,td,tfoot,th,thead,tr"),
-    optionalEndTagInlineElements = makeMap("rp,rt"),
+var optionalEndTagBlockElements = toMap("colgroup,dd,dt,li,p,tbody,td,tfoot,th,thead,tr"),
+    optionalEndTagInlineElements = toMap("rp,rt"),
     optionalEndTagElements = angular.extend({},
                                             optionalEndTagInlineElements,
                                             optionalEndTagBlockElements);
 
 // Safe Block Elements - HTML5
-var blockElements = angular.extend({}, optionalEndTagBlockElements, makeMap("address,article," +
+var blockElements = angular.extend({}, optionalEndTagBlockElements, toMap("address,article," +
         "aside,blockquote,caption,center,del,dir,div,dl,figure,figcaption,footer,h1,h2,h3,h4,h5," +
         "h6,header,hgroup,hr,ins,map,menu,nav,ol,pre,script,section,table,ul"));
 
 // Inline Elements - HTML5
-var inlineElements = angular.extend({}, optionalEndTagInlineElements, makeMap("a,abbr,acronym,b," +
+var inlineElements = angular.extend({}, optionalEndTagInlineElements, toMap("a,abbr,acronym,b," +
         "bdi,bdo,big,br,cite,code,del,dfn,em,font,i,img,ins,kbd,label,map,mark,q,ruby,rp,rt,s," +
         "samp,small,span,strike,strong,sub,sup,time,tt,u,var"));
 
@@ -208,12 +182,12 @@ var inlineElements = angular.extend({}, optionalEndTagInlineElements, makeMap("a
 // https://wiki.whatwg.org/wiki/Sanitization_rules#svg_Elements
 // Note: the elements animate,animateColor,animateMotion,animateTransform,set are intentionally omitted.
 // They can potentially allow for arbitrary javascript to be executed. See #11290
-var svgElements = makeMap("circle,defs,desc,ellipse,font-face,font-face-name,font-face-src,g,glyph," +
+var svgElements = toMap("circle,defs,desc,ellipse,font-face,font-face-name,font-face-src,g,glyph," +
         "hkern,image,linearGradient,line,marker,metadata,missing-glyph,mpath,path,polygon,polyline," +
         "radialGradient,rect,stop,svg,switch,text,title,tspan,use");
 
 // Special Elements (can contain anything)
-var specialElements = makeMap("script,style");
+var specialElements = toMap("script,style");
 
 var validElements = angular.extend({},
                                    voidElements,
@@ -223,9 +197,9 @@ var validElements = angular.extend({},
                                    svgElements);
 
 //Attributes that have href and hence need to be sanitized
-var uriAttrs = makeMap("background,cite,href,longdesc,src,usemap,xlink:href");
+var uriAttrs = toMap("background,cite,href,longdesc,src,usemap,xlink:href");
 
-var htmlAttrs = makeMap('abbr,align,alt,axis,bgcolor,border,cellpadding,cellspacing,class,clear,' +
+var htmlAttrs = toMap('abbr,align,alt,axis,bgcolor,border,cellpadding,cellspacing,class,clear,' +
     'color,cols,colspan,compact,coords,dir,face,headers,height,hreflang,hspace,' +
     'ismap,lang,language,nohref,nowrap,rel,rev,rows,rowspan,rules,' +
     'scope,scrolling,shape,size,span,start,summary,tabindex,target,title,type,' +
@@ -233,7 +207,7 @@ var htmlAttrs = makeMap('abbr,align,alt,axis,bgcolor,border,cellpadding,cellspac
 
 // SVG attributes (without "id" and "name" attributes)
 // https://wiki.whatwg.org/wiki/Sanitization_rules#svg_Attributes
-var svgAttrs = makeMap('accent-height,accumulate,additive,alphabetic,arabic-form,ascent,' +
+var svgAttrs = toMap('accent-height,accumulate,additive,alphabetic,arabic-form,ascent,' +
     'baseProfile,bbox,begin,by,calcMode,cap-height,class,color,color-rendering,content,' +
     'cx,cy,d,dx,dy,descent,display,dur,end,fill,fill-rule,font-family,font-size,font-stretch,' +
     'font-style,font-variant,font-weight,from,fx,fy,g1,g2,glyph-name,gradientUnits,hanging,' +
@@ -254,7 +228,7 @@ var validAttrs = angular.extend({},
                                 svgAttrs,
                                 htmlAttrs);
 
-function makeMap(str, lowercaseKeys) {
+function toMap(str, lowercaseKeys) {
   var obj = {}, items = str.split(','), i;
   for (i = 0; i < items.length; i++) {
     obj[lowercaseKeys ? angular.lowercase(items[i]) : items[i]] = true;
@@ -262,11 +236,34 @@ function makeMap(str, lowercaseKeys) {
   return obj;
 }
 
+var baseNode;
+(function(window) {
+  var doc;
+  if (window.DOMDocument) {
+    doc = new window.DOMDocument();
+  } else if (window.document && window.document.implementation) {
+    doc = window.document.implementation.createHTMLDocument("inert");
+  } else if (window.ActiveXObject) {
+    doc = new window.ActiveXObject("Msxml.DOMDocument");
+  } else {
+    throw $sanitizeMinErr('ddns', "DOMDocument not supported");
+  }
+  var docElement = doc.documentElement || doc.getDocumentElement();
+  var bodyElements = docElement.getElementsByTagName('body');
+  if (bodyElements.length === 1) {
+    baseNode = bodyElements[0];
+  } else {
+    var html = doc.createElement('html');
+    baseNode = doc.createElement('body');
+    html.appendChild(baseNode);
+    doc.appendChild(html);
+  }
+})(window);
 
 /**
  * @example
  * htmlParser(htmlString, {
- *     start: function(tag, attrs, unary) {},
+ *     start: function(tag, attrs) {},
  *     end: function(tag) {},
  *     chars: function(text) {},
  *     comment: function(text) {}
@@ -276,153 +273,60 @@ function makeMap(str, lowercaseKeys) {
  * @param {object} handler
  */
 function htmlParser(html, handler) {
-  if (typeof html !== 'string') {
-    if (html === null || typeof html === 'undefined') {
-      html = '';
-    } else {
-      html = '' + html;
-    }
+  if (html === null || html === undefined) {
+    html = '';
+  } else if (typeof html !== 'string') {
+    html = '' + html;
   }
-  var index, chars, match, stack = [], last = html, text;
-  stack.last = function() { return stack[stack.length - 1]; };
-
-  while (html) {
-    text = '';
-    chars = true;
-
-    // Make sure we're not in a script or style element
-    if (!stack.last() || !specialElements[stack.last()]) {
-
-      // Comment
-      if (html.indexOf("<!--") === 0) {
-        // comments containing -- are not allowed unless they terminate the comment
-        index = html.indexOf("--", 4);
-
-        if (index >= 0 && html.lastIndexOf("-->", index) === index) {
-          if (handler.comment) handler.comment(html.substring(4, index));
-          html = html.substring(index + 3);
-          chars = false;
-        }
-      // DOCTYPE
-      } else if (DOCTYPE_REGEXP.test(html)) {
-        match = html.match(DOCTYPE_REGEXP);
-
-        if (match) {
-          html = html.replace(match[0], '');
-          chars = false;
+  baseNode.innerHTML = html;
+  var node = baseNode.firstChild;
+  while (node) {
+    switch (node.nodeType) {
+      case 1: // ELEMENT_NODE
+        handler.start(node.nodeName.toLowerCase(), attrToMap(node.attributes));
+        break;
+      case 3: // TEXT NODE
+        handler.chars(node.textContent);
+        break;
+      case 8: // COMMENT NODE
+        handler.comment(node.textContent);
+        break;
+    }
+    var nextNode;
+    if (!(nextNode = node.firstChild)) {
+      if (nextNode = node.nextSibling) {
+        if (node.nodeType == 1) {
+          handler.end(node.nodeName.toLowerCase());
         }
-      // end tag
-      } else if (BEGING_END_TAGE_REGEXP.test(html)) {
-        match = html.match(END_TAG_REGEXP);
-
-        if (match) {
-          html = html.substring(match[0].length);
-          match[0].replace(END_TAG_REGEXP, parseEndTag);
-          chars = false;
+      } else {
+        if (node.nodeType == 1) {
+          handler.end(node.nodeName.toLowerCase());
         }
-
-      // start tag
-      } else if (BEGIN_TAG_REGEXP.test(html)) {
-        match = html.match(START_TAG_REGEXP);
-
-        if (match) {
-          // We only have a valid start-tag if there is a '>'.
-          if (match[4]) {
-            html = html.substring(match[0].length);
-            match[0].replace(START_TAG_REGEXP, parseStartTag);
+        while (nextNode == null) {
+          node = node.parentNode;
+          if (node === baseNode) break;
+          nextNode = node.nextSibling;
+          if (node.nodeType == 1) {
+            handler.end(node.nodeName.toLowerCase());
           }
-          chars = false;
-        } else {
-          // no ending tag found --- this piece should be encoded as an entity.
-          text += '<';
-          html = html.substring(1);
         }
       }
-
-      if (chars) {
-        index = html.indexOf("<");
-
-        text += index < 0 ? html : html.substring(0, index);
-        html = index < 0 ? "" : html.substring(index);
-
-        if (handler.chars) handler.chars(decodeEntities(text));
-      }
-
-    } else {
-      // IE versions 9 and 10 do not understand the regex '[^]', so using a workaround with [\W\w].
-      html = html.replace(new RegExp("([\\W\\w]*)<\\s*\\/\\s*" + stack.last() + "[^>]*>", 'i'),
-        function(all, text) {
-          text = text.replace(COMMENT_REGEXP, "$1").replace(CDATA_REGEXP, "$1");
-
-          if (handler.chars) handler.chars(decodeEntities(text));
-
-          return "";
-      });
-
-      parseEndTag("", stack.last());
     }
-
-    if (html == last) {
-      throw $sanitizeMinErr('badparse', "The sanitizer was unable to parse the following block " +
-                                        "of html: {0}", html);
-    }
-    last = html;
+    node = nextNode;
   }
 
-  // Clean up any remaining tags
-  parseEndTag();
-
-  function parseStartTag(tag, tagName, rest, unary) {
-    tagName = angular.lowercase(tagName);
-    if (blockElements[tagName]) {
-      while (stack.last() && inlineElements[stack.last()]) {
-        parseEndTag("", stack.last());
-      }
-    }
-
-    if (optionalEndTagElements[tagName] && stack.last() == tagName) {
-      parseEndTag("", tagName);
-    }
-
-    unary = voidElements[tagName] || !!unary;
-
-    if (!unary) {
-      stack.push(tagName);
-    }
-
-    var attrs = {};
-
-    rest.replace(ATTR_REGEXP,
-      function(match, name, doubleQuotedValue, singleQuotedValue, unquotedValue) {
-        var value = doubleQuotedValue
-          || singleQuotedValue
-          || unquotedValue
-          || '';
-
-        attrs[name] = decodeEntities(value);
-    });
-    if (handler.start) handler.start(tagName, attrs, unary);
+  while (node = baseNode.firstChild) {
+    baseNode.removeChild(node);
   }
+}
 
-  function parseEndTag(tag, tagName) {
-    var pos = 0, i;
-    tagName = angular.lowercase(tagName);
-    if (tagName) {
-      // Find the closest opened tag of the same type
-      for (pos = stack.length - 1; pos >= 0; pos--) {
-        if (stack[pos] == tagName) break;
-      }
-    }
-
-    if (pos >= 0) {
-      // Close all the open elements, up the stack
-      for (i = stack.length - 1; i >= pos; i--)
-        if (handler.end) handler.end(stack[i]);
-
-      // Remove the open elements from the stack
-      stack.length = pos;
-    }
+function attrToMap(attrs) {
+  var map = {};
+  for (var i = 0, ii = attrs.length; i < ii; i++) {
+    var attr = attrs[i];
+    map[attr.name] = attr.value;
   }
+  return map;
 }
 
 var hiddenPre=document.createElement("pre");
@@ -466,7 +370,7 @@ function encodeEntities(value) {
  * create an HTML/XML writer which writes to buffer
  * @param {Array} buf use buf.jain('') to get out sanitized html string
  * @returns {object} in the form of {
- *     start: function(tag, attrs, unary) {},
+ *     start: function(tag, attrs) {},
  *     end: function(tag) {},
  *     chars: function(text) {},
  *     comment: function(text) {}
@@ -476,7 +380,7 @@ function htmlSanitizeWriter(buf, uriValidator) {
   var ignore = false;
   var out = angular.bind(buf, buf.push);
   return {
-    start: function(tag, attrs, unary) {
+    start: function(tag, attrs) {
       tag = angular.lowercase(tag);
       if (!ignore && specialElements[tag]) {
         ignore = tag;
@@ -496,25 +400,25 @@ function htmlSanitizeWriter(buf, uriValidator) {
             out('"');
           }
         });
-        out(unary ? '/>' : '>');
+        out('>');
       }
     },
     end: function(tag) {
-        tag = angular.lowercase(tag);
-        if (!ignore && validElements[tag] === true) {
-          out('</');
-          out(tag);
-          out('>');
-        }
-        if (tag == ignore) {
-          ignore = false;
-        }
-      },
+      tag = angular.lowercase(tag);
+      if (!ignore && validElements[tag] === true) {
+        out('</');
+        out(tag);
+        out('>');
+      }
+      if (tag == ignore) {
+        ignore = false;
+      }
+    },
     chars: function(chars) {
-        if (!ignore) {
-          out(encodeEntities(chars));
-        }
+      if (!ignore) {
+        out(encodeEntities(chars));
       }
+    }
   };
 }
 
diff --git a/test/ngSanitize/sanitizeSpec.js b/test/ngSanitize/sanitizeSpec.js
index 33d036c97efc..068991e72282 100644
--- a/test/ngSanitize/sanitizeSpec.js
+++ b/test/ngSanitize/sanitizeSpec.js
@@ -22,18 +22,21 @@ describe('HTML', function() {
     var handler, start, text, comment;
     beforeEach(function() {
       text = "";
+      start = null;
       handler = {
-        start: function(tag, attrs, unary) {
+        start: function(tag, attrs) {
           start = {
             tag: tag,
-            attrs: attrs,
-            unary: unary
+            attrs: attrs
           };
           // Since different browsers handle newlines differently we trim
           // so that it is easier to write tests.
-          angular.forEach(attrs, function(value, key) {
+          for (var i = 0, ii = attrs.length; i < ii; i++) {
+            var keyValue = attrs[i];
+            var key = keyValue.key;
+            var value = keyValue.value;
             attrs[key] = value.replace(/^\s*/, '').replace(/\s*$/, '');
-          });
+          }
         },
         chars: function(text_) {
           text += text_;
@@ -52,33 +55,9 @@ describe('HTML', function() {
       expect(comment).toEqual('FOOBAR');
     });
 
-    it('should throw an exception for invalid comments', function() {
-      var caught=false;
-      try {
-        htmlParser('<!-->', handler);
-      }
-      catch (ex) {
-        caught = true;
-        // expected an exception due to a bad parse
-      }
-      expect(caught).toBe(true);
-    });
-
-    it('double-dashes are not allowed in a comment', function() {
-      var caught=false;
-      try {
-        htmlParser('<!-- -- -->', handler);
-      }
-      catch (ex) {
-        caught = true;
-        // expected an exception due to a bad parse
-      }
-      expect(caught).toBe(true);
-    });
-
     it('should parse basic format', function() {
       htmlParser('<tag attr="value">text</tag>', handler);
-      expect(start).toEqual({tag:'tag', attrs:{attr:'value'}, unary:false});
+      expect(start).toEqual({tag:'tag', attrs:{attr:'value'}});
       expect(text).toEqual('text');
     });
 
@@ -88,15 +67,15 @@ describe('HTML', function() {
     });
 
     it('should throw badparse if text content contains "<" followed by "/" without matching ">"', function() {
-      expect(function() {
-        htmlParser('foo </ bar', handler);
-      }).toThrowMinErr('$sanitize', 'badparse', 'The sanitizer was unable to parse the following block of html: </ bar');
+      htmlParser('foo </ bar', handler);
+      expect(start).toEqual(undefined);
+      expect(text).toEqual('foo ');
     });
 
     it('should throw badparse if text content contains "<" followed by an ASCII letter without matching ">"', function() {
-      expect(function() {
-        htmlParser('foo <a bar', handler);
-      }).toThrowMinErr('$sanitize', 'badparse', 'The sanitizer was unable to parse the following block of html: <a bar');
+      htmlParser('foo <a bar', handler);
+      expect(start).toEqual(undefined);
+      expect(text).toEqual('foo ');
     });
 
     it('should accept tag delimiters such as "<" inside real tags', function() {
@@ -107,25 +86,25 @@ describe('HTML', function() {
 
     it('should parse newlines in tags', function() {
       htmlParser('<tag\n attr="value"\n>text</\ntag\n>', handler);
-      expect(start).toEqual({tag:'tag', attrs:{attr:'value'}, unary:false});
+      expect(start).toEqual({tag:'tag', attrs:{attr:'value'}});
       expect(text).toEqual('text');
     });
 
     it('should parse newlines in attributes', function() {
       htmlParser('<tag attr="\nvalue\n">text</tag>', handler);
-      expect(start).toEqual({tag:'tag', attrs:{attr:'value'}, unary:false});
+      expect(start).toEqual({tag:'tag', attrs:{attr:'\nvalue\n'}});
       expect(text).toEqual('text');
     });
 
     it('should parse namespace', function() {
       htmlParser('<ns:t-a-g ns:a-t-t-r="\nvalue\n">text</ns:t-a-g>', handler);
-      expect(start).toEqual({tag:'ns:t-a-g', attrs:{'ns:a-t-t-r':'value'}, unary:false});
+      expect(start).toEqual({tag:'ns:t-a-g', attrs:{'ns:a-t-t-r':'\nvalue\n'}});
       expect(text).toEqual('text');
     });
 
     it('should parse empty value attribute of node', function() {
       htmlParser('<OPTION selected value="">abc</OPTION>', handler);
-      expect(start).toEqual({tag:'option', attrs:{selected:'', value:''}, unary:false});
+      expect(start).toEqual({tag:'option', attrs:{selected:'', value:''}});
       expect(text).toEqual('abc');
     });
   });
@@ -137,11 +116,12 @@ describe('HTML', function() {
   });
 
   it('should remove script', function() {
-    expectHTML('a<SCRIPT>evil< / scrIpt >c.').toEqual('ac.');
+    expectHTML('a<SCRIPT>evil< / scrIpt >c.').toEqual('a');
+    expectHTML('a<SCRIPT>evil</scrIpt>c.').toEqual('ac.');
   });
 
   it('should remove script that has newline characters', function() {
-    expectHTML('a<SCRIPT\n>\n\revil\n\r< / scrIpt\n >c.').toEqual('ac.');
+    expectHTML('a<SCRIPT\n>\n\revil\n\r</scrIpt\n >c.').toEqual('ac.');
   });
 
   it('should remove DOCTYPE header', function() {
@@ -173,7 +153,7 @@ describe('HTML', function() {
   });
 
   it('should remove double nested script', function() {
-    expectHTML('a<SCRIPT>ev<script>evil</sCript>il</scrIpt>c.').toEqual('ac.');
+    expectHTML('a<SCRIPT>ev<script>evil</sCript>il</scrIpt>c.').toEqual('ailc.');
   });
 
   it('should remove unknown  names', function() {
@@ -185,7 +165,7 @@ describe('HTML', function() {
   });
 
   it('should handle self closed elements', function() {
-    expectHTML('a<hr/>c').toEqual('a<hr/>c');
+    expectHTML('a<hr/>c').toEqual('a<hr></hr>c');
   });
 
   it('should handle namespace', function() {
@@ -212,7 +192,7 @@ describe('HTML', function() {
 
   it('should ignore back slash as escape', function() {
     expectHTML('<img alt="xxx\\" title="><script>....">').
-      toEqual('<img alt="xxx\\" title="&gt;&lt;script&gt;...."/>');
+      toEqual('<img alt="xxx\\" title="&gt;&lt;script&gt;...."></img>');
   });
 
   it('should ignore object attributes', function() {
@@ -247,8 +227,8 @@ describe('HTML', function() {
   });
 
   it('should accept SVG tags', function() {
-    expectHTML('<svg width="400px" height="150px" xmlns="http://www.w3.org/2000/svg"><circle cx="50" cy="50" r="40" stroke="black" stroke-width="3" fill="red"/></svg>')
-        .toEqual('<svg width="400px" height="150px" xmlns="http://www.w3.org/2000/svg"><circle cx="50" cy="50" r="40" stroke="black" stroke-width="3" fill="red"/></svg>');
+      expectHTML('<svg width="400px" height="150px" xmlns="http://www.w3.org/2000/svg"><circle cx="50" cy="50" r="40" stroke="black" stroke-width="3" fill="red"></svg>')
+        .toEqual('<svg width="400px" height="150px" xmlns="http://www.w3.org/2000/svg"><circle cx="50" cy="50" r="40" stroke="black" stroke-width="3" fill="red"></circle></svg>');
   });
 
   it('should not ignore white-listed svg camelCased attributes', function() {
@@ -435,11 +415,11 @@ describe('HTML', function() {
       inject(function() {
         $$sanitizeUri.andReturn('someUri');
 
-        expectHTML('<img src="someUri"/>').toEqual('<img src="someUri"/>');
+        expectHTML('<img src="someUri"/>').toEqual('<img src="someUri"></img>');
         expect($$sanitizeUri).toHaveBeenCalledWith('someUri', true);
 
         $$sanitizeUri.andReturn('unsafe:someUri');
-        expectHTML('<img src="someUri"/>').toEqual('<img/>');
+        expectHTML('<img src="someUri"/>').toEqual('<img></img>');
       });
     });
 

From 0b39333b150dde8bc9a2ccddff20f27d42a1d574 Mon Sep 17 00:00:00 2001
From: Lucas Galfaso <lgalfaso@gmail.com>
Date: Fri, 31 Jul 2015 22:48:08 +0200
Subject: [PATCH 2/2] fix(sanitize): Do not duplicate self closing elements

For HTML self closing elements:
* Do not create an end element
* Create the element as self closed
---
 src/ngSanitize/sanitize.js          | 21 +++++++++++++--------
 test/ngSanitize/filter/linkySpec.js |  4 ++--
 test/ngSanitize/sanitizeSpec.js     | 18 ++++++++++--------
 3 files changed, 25 insertions(+), 18 deletions(-)

diff --git a/src/ngSanitize/sanitize.js b/src/ngSanitize/sanitize.js
index 0d7011a822c7..a102b4108ee6 100644
--- a/src/ngSanitize/sanitize.js
+++ b/src/ngSanitize/sanitize.js
@@ -189,6 +189,9 @@ var svgElements = toMap("circle,defs,desc,ellipse,font-face,font-face-name,font-
 // Special Elements (can contain anything)
 var specialElements = toMap("script,style");
 
+var selfClosingElements = toMap("area,base,br,col,command,embed,hr,img,input,keygen,link,meta,param,source," +
+        "track,wbr");
+
 var validElements = angular.extend({},
                                    voidElements,
                                    blockElements,
@@ -283,7 +286,8 @@ function htmlParser(html, handler) {
   while (node) {
     switch (node.nodeType) {
       case 1: // ELEMENT_NODE
-        handler.start(node.nodeName.toLowerCase(), attrToMap(node.attributes));
+        handler.start(node.nodeName.toLowerCase(), attrToMap(node.attributes),
+                      selfClosingElements[node.nodeName.toLowerCase()]);
         break;
       case 3: // TEXT NODE
         handler.chars(node.textContent);
@@ -295,18 +299,18 @@ function htmlParser(html, handler) {
     var nextNode;
     if (!(nextNode = node.firstChild)) {
       if (nextNode = node.nextSibling) {
-        if (node.nodeType == 1) {
+        if (node.nodeType == 1 && !selfClosingElements[node.nodeName.toLowerCase()]) {
           handler.end(node.nodeName.toLowerCase());
         }
       } else {
-        if (node.nodeType == 1) {
+        if (node.nodeType == 1 && !selfClosingElements[node.nodeName.toLowerCase()]) {
           handler.end(node.nodeName.toLowerCase());
         }
         while (nextNode == null) {
           node = node.parentNode;
           if (node === baseNode) break;
           nextNode = node.nextSibling;
-          if (node.nodeType == 1) {
+          if (node.nodeType == 1 && !selfClosingElements[node.nodeName.toLowerCase()]) {
             handler.end(node.nodeName.toLowerCase());
           }
         }
@@ -370,7 +374,7 @@ function encodeEntities(value) {
  * create an HTML/XML writer which writes to buffer
  * @param {Array} buf use buf.jain('') to get out sanitized html string
  * @returns {object} in the form of {
- *     start: function(tag, attrs) {},
+ *     start: function(tag, attrs, selfClose) {},
  *     end: function(tag) {},
  *     chars: function(text) {},
  *     comment: function(text) {}
@@ -380,7 +384,7 @@ function htmlSanitizeWriter(buf, uriValidator) {
   var ignore = false;
   var out = angular.bind(buf, buf.push);
   return {
-    start: function(tag, attrs) {
+    start: function(tag, attrs, selfClose) {
       tag = angular.lowercase(tag);
       if (!ignore && specialElements[tag]) {
         ignore = tag;
@@ -388,7 +392,8 @@ function htmlSanitizeWriter(buf, uriValidator) {
       if (!ignore && validElements[tag] === true) {
         out('<');
         out(tag);
-        angular.forEach(attrs, function(value, key) {
+        angular.forEach(Object.keys(attrs).sort(), function(key) {
+          var value = attrs[key];
           var lkey=angular.lowercase(key);
           var isImage = (tag === 'img' && lkey === 'src') || (lkey === 'background');
           if (validAttrs[lkey] === true &&
@@ -400,7 +405,7 @@ function htmlSanitizeWriter(buf, uriValidator) {
             out('"');
           }
         });
-        out('>');
+        out(selfClose ? '/>' : '>');
       }
     },
     end: function(tag) {
diff --git a/test/ngSanitize/filter/linkySpec.js b/test/ngSanitize/filter/linkySpec.js
index f0c4f3956e54..74cdd25596e7 100644
--- a/test/ngSanitize/filter/linkySpec.js
+++ b/test/ngSanitize/filter/linkySpec.js
@@ -50,8 +50,8 @@ describe('linky', function() {
 
   it('should handle target:', function() {
     expect(linky("http://example.com", "_blank")).
-      toEqual('<a target="_blank" href="http://example.com">http://example.com</a>');
+      toEqual('<a href="http://example.com" target="_blank">http://example.com</a>');
     expect(linky("http://example.com", "someNamedIFrame")).
-      toEqual('<a target="someNamedIFrame" href="http://example.com">http://example.com</a>');
+      toEqual('<a href="http://example.com" target="someNamedIFrame">http://example.com</a>');
   });
 });
diff --git a/test/ngSanitize/sanitizeSpec.js b/test/ngSanitize/sanitizeSpec.js
index 068991e72282..8490bf828b4b 100644
--- a/test/ngSanitize/sanitizeSpec.js
+++ b/test/ngSanitize/sanitizeSpec.js
@@ -112,7 +112,7 @@ describe('HTML', function() {
   // THESE TESTS ARE EXECUTED WITH COMPILED ANGULAR
   it('should echo html', function() {
     expectHTML('hello<b class="1\'23" align=\'""\'>world</b>.').
-       toEqual('hello<b class="1\'23" align="&#34;&#34;">world</b>.');
+       toEqual('hello<b align="&#34;&#34;" class="1\'23">world</b>.');
   });
 
   it('should remove script', function() {
@@ -165,7 +165,9 @@ describe('HTML', function() {
   });
 
   it('should handle self closed elements', function() {
-    expectHTML('a<hr/>c').toEqual('a<hr></hr>c');
+    expectHTML('a<hr/>c').toEqual('a<hr/>c');
+    expectHTML('<br/>').toEqual('<br/>');
+    expectHTML('<br>').toEqual('<br/>');
   });
 
   it('should handle namespace', function() {
@@ -192,7 +194,7 @@ describe('HTML', function() {
 
   it('should ignore back slash as escape', function() {
     expectHTML('<img alt="xxx\\" title="><script>....">').
-      toEqual('<img alt="xxx\\" title="&gt;&lt;script&gt;...."></img>');
+      toEqual('<img alt="xxx\\" title="&gt;&lt;script&gt;...."/>');
   });
 
   it('should ignore object attributes', function() {
@@ -227,8 +229,8 @@ describe('HTML', function() {
   });
 
   it('should accept SVG tags', function() {
-      expectHTML('<svg width="400px" height="150px" xmlns="http://www.w3.org/2000/svg"><circle cx="50" cy="50" r="40" stroke="black" stroke-width="3" fill="red"></svg>')
-        .toEqual('<svg width="400px" height="150px" xmlns="http://www.w3.org/2000/svg"><circle cx="50" cy="50" r="40" stroke="black" stroke-width="3" fill="red"></circle></svg>');
+    expectHTML('<svg width="400px" height="150px" xmlns="http://www.w3.org/2000/svg"><circle cx="50" cy="50" r="40" stroke="black" stroke-width="3" fill="red"></svg>')
+        .toEqual('<svg height="150px" width="400px" xmlns="http://www.w3.org/2000/svg"><circle cx="50" cy="50" fill="red" r="40" stroke="black" stroke-width="3"></circle></svg>');
   });
 
   it('should not ignore white-listed svg camelCased attributes', function() {
@@ -259,7 +261,7 @@ describe('HTML', function() {
 
     expectHTML('<svg><a xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="?"><circle r="400"></circle>' +
     '<animate attributeName="xlink:href" begin="0" from="javascript:alert(1)" to="&" /></a></svg>')
-      .toEqual('<svg><a xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="?"><circle r="400"></circle></a></svg>');
+      .toEqual('<svg><a xlink:href="?" xmlns:xlink="http://www.w3.org/1999/xlink"><circle r="400"></circle></a></svg>');
   });
 
   describe('htmlSanitizerWriter', function() {
@@ -415,11 +417,11 @@ describe('HTML', function() {
       inject(function() {
         $$sanitizeUri.andReturn('someUri');
 
-        expectHTML('<img src="someUri"/>').toEqual('<img src="someUri"></img>');
+        expectHTML('<img src="someUri"/>').toEqual('<img src="someUri"/>');
         expect($$sanitizeUri).toHaveBeenCalledWith('someUri', true);
 
         $$sanitizeUri.andReturn('unsafe:someUri');
-        expectHTML('<img src="someUri"/>').toEqual('<img></img>');
+        expectHTML('<img src="someUri"/>').toEqual('<img/>');
       });
     });