From 8781c72134c303f9b98458ac20f12c843a7b8784 Mon Sep 17 00:00:00 2001
From: Sylvain Hamel <sylvainhamel0@gmail.com>
Date: Sat, 5 Jul 2014 01:03:52 -0400
Subject: [PATCH 1/2] fix: text that looks like an html tag but is not causes
 [$sanitize:badparse] error

---
 src/ngSanitize/sanitize.js      |  4 ++++
 test/ngSanitize/sanitizeSpec.js | 19 ++++++++++++++++++-
 2 files changed, 22 insertions(+), 1 deletion(-)
diff --git a/src/ngSanitize/sanitize.js b/src/ngSanitize/sanitize.js
index fae50aac259d..593e62f59ef1 100644
--- a/src/ngSanitize/sanitize.js
+++ b/src/ngSanitize/sanitize.js
@@ -277,6 +277,10 @@ function htmlParser( html, handler ) {
           html = html.substring( match[0].length );
           match[0].replace( START_TAG_REGEXP, parseStartTag );
           chars = false;
+        } else {
+          // no ending tag found
+          if (handler.chars) handler.chars( '<' );
+          html = html.substring(1);
         }
       }
 
diff --git a/test/ngSanitize/sanitizeSpec.js b/test/ngSanitize/sanitizeSpec.js
index 76a48087576e..611d9664cc73 100644
--- a/test/ngSanitize/sanitizeSpec.js
+++ b/test/ngSanitize/sanitizeSpec.js
@@ -21,6 +21,7 @@ describe('HTML', function() {
 
     var handler, start, text, comment;
     beforeEach(function() {
+      text = "";
       handler = {
         start: function(tag, attrs, unary){
           start = {
@@ -35,7 +36,7 @@ describe('HTML', function() {
           });
         },
         chars: function(text_){
-          text = text_;
+          text += text_;
         },
         end:function(tag) {
           expect(tag).toEqual(start.tag);
@@ -81,6 +82,16 @@ describe('HTML', function() {
       expect(text).toEqual('text');
     });
 
+    it('should parse unterminated tags as regular content', function() {
+      htmlParser('<a text1 text2 <a text1 text2', handler);
+      expect(text).toEqual('<a text1 text2 <a text1 text2');
+    });
+
+    it('should accept tag delimiters such as "<" inside real tags', function() {
+      htmlParser('<p> 10 < 100 </p>', handler);
+      expect(text).toEqual(' 10 < 100 ');
+    });
+
     it('should parse newlines in tags', function() {
       htmlParser('<\ntag\n attr="value"\n>text<\n/\ntag\n>', handler);
       expect(start).toEqual({tag:'tag', attrs:{attr:'value'}, unary:false});
@@ -195,6 +206,12 @@ describe('HTML', function() {
     expectHTML('\na\n').toEqual('&#10;a&#10;');
   });
 
+  it('should accept tag delimiters such as "<" inside real tags (with nesting)', function() {
+    //this is an integrated version of the 'should accept tag delimiters such as "<" inside real tags' test
+    expectHTML('<p> 10 < <span>100</span> </p>')
+    .toEqual('<p> 10 &lt; <span>100</span> </p>');
+  });
+
   describe('htmlSanitizerWriter', function() {
     /* global htmlSanitizeWriter: false */
     if (angular.isUndefined(window.htmlSanitizeWriter)) return;

From 5eee03c200eefb85447f521c0be74a8ed4b2e916 Mon Sep 17 00:00:00 2001
From: Caitlin Potter <caitpotter88@gmail.com>
Date: Tue, 15 Jul 2014 13:06:49 -0400
Subject: [PATCH 2/2] fix(ngSanitize): follow HTML parser rules for start tags
 / allow < in text content

ngSanitize will now permit opening braces in text content, provided they are not followed by either
an unescaped backslash, or by an ASCII letter (u+0041 - u+005A, u+0061 - u+007A), in compliance with
rules of the parsing spec, without taking insertion mode into account.

BREAKING CHANGE

Previously, $sanitize would "fix" invalid markup in which a space preceded alphanumeric characters
in a start-tag. Following this change, any opening angle bracket which is not followed by either a
forward slash, or by an ASCII letter (a-z | A-Z) will not be considered a start tag delimiter, per
the HTML parsing spec (http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html).
---
 src/ngSanitize/sanitize.js      | 22 +++++++++++---------
 test/ngSanitize/sanitizeSpec.js | 36 ++++++++++++++++++++++++---------
 2 files changed, 39 insertions(+), 19 deletions(-)

diff --git a/src/ngSanitize/sanitize.js b/src/ngSanitize/sanitize.js
index 593e62f59ef1..de6dd3dd135d 100644
--- a/src/ngSanitize/sanitize.js
+++ b/src/ngSanitize/sanitize.js
@@ -154,11 +154,11 @@ function sanitizeText(chars) {
 
 // Regular Expressions for parsing tags and attributes
 var START_TAG_REGEXP =
-       /^<\s*([\w:-]+)((?:\s+[\w:-]+(?:\s*=\s*(?:(?:"[^"]*")|(?:'[^']*')|[^>\s]+))?)*)\s*(\/?)\s*>/,
-  END_TAG_REGEXP = /^<\s*\/\s*([\w:-]+)[^>]*>/,
+       /^<((?:[a-zA-Z])[\w:-]*)((?:\s+[\w:-]+(?:\s*=\s*(?:(?:"[^"]*")|(?:'[^']*')|[^>\s]+))?)*)\s*(\/?)\s*(>?)/,
+  END_TAG_REGEXP = /^<\/\s*([\w:-]+)[^>]*>/,
   ATTR_REGEXP = /([\w:-]+)(?:\s*=\s*(?:(?:"((?:[^"])*)")|(?:'((?:[^'])*)')|([^>\s]+)))?/g,
   BEGIN_TAG_REGEXP = /^</,
-  BEGING_END_TAGE_REGEXP = /^<\s*\//,
+  BEGING_END_TAGE_REGEXP = /^<\//,
   COMMENT_REGEXP = /<!--(.*?)-->/g,
   DOCTYPE_REGEXP = /<!DOCTYPE([^>]*?)>/i,
   CDATA_REGEXP = /<!\[CDATA\[(.*?)]]>/g,
@@ -232,10 +232,11 @@ function makeMap(str) {
  * @param {object} handler
  */
 function htmlParser( html, handler ) {
-  var index, chars, match, stack = [], last = html;
+  var index, chars, match, stack = [], last = html, text;
   stack.last = function() { return stack[ stack.length - 1 ]; };
 
   while ( html ) {
+    text = '';
     chars = true;
 
     // Make sure we're not in a script or style element
@@ -274,12 +275,15 @@ function htmlParser( html, handler ) {
         match = html.match( START_TAG_REGEXP );
 
         if ( match ) {
-          html = html.substring( match[0].length );
-          match[0].replace( START_TAG_REGEXP, parseStartTag );
+          // We only have a valid start-tag if there is a '>'.
+          if ( match[4] ) {
+            html = html.substring( match[0].length );
+            match[0].replace( START_TAG_REGEXP, parseStartTag );
+          }
           chars = false;
         } else {
-          // no ending tag found
-          if (handler.chars) handler.chars( '<' );
+          // no ending tag found --- this piece should be encoded as an entity.
+          text += '<';
           html = html.substring(1);
         }
       }
@@ -287,7 +291,7 @@ function htmlParser( html, handler ) {
       if ( chars ) {
         index = html.indexOf("<");
 
-        var text = index < 0 ? html : html.substring( 0, index );
+        text += index < 0 ? html : html.substring( 0, index );
         html = index < 0 ? "" : html.substring( index );
 
         if (handler.chars) handler.chars( decodeEntities(text) );
diff --git a/test/ngSanitize/sanitizeSpec.js b/test/ngSanitize/sanitizeSpec.js
index 611d9664cc73..5f2594397657 100644
--- a/test/ngSanitize/sanitizeSpec.js
+++ b/test/ngSanitize/sanitizeSpec.js
@@ -82,18 +82,31 @@ describe('HTML', function() {
       expect(text).toEqual('text');
     });
 
-    it('should parse unterminated tags as regular content', function() {
-      htmlParser('<a text1 text2 <a text1 text2', handler);
-      expect(text).toEqual('<a text1 text2 <a text1 text2');
+    it('should not treat "<" followed by a non-/ or non-letter as a tag', function() {
+      expectHTML('<- text1 text2 <1 text1 text2 <{', handler).
+        toBe('&lt;- text1 text2 &lt;1 text1 text2 &lt;{');
+    });
+
+    it('should throw badparse if text content contains "<" followed by "/" without matching ">"', function() {
+      expect(function() {
+        htmlParser('foo </ bar', handler);
+      }).toThrowMinErr('$sanitize', 'badparse', 'The sanitizer was unable to parse the following block of html: </ bar');
+    });
+
+    it('should throw badparse if text content contains "<" followed by an ASCII letter without matching ">"', function() {
+      expect(function() {
+        htmlParser('foo <a bar', handler);
+      }).toThrowMinErr('$sanitize', 'badparse', 'The sanitizer was unable to parse the following block of html: <a bar');
     });
 
     it('should accept tag delimiters such as "<" inside real tags', function() {
+      // Assert that the < is part of the text node content, and not part of a tag name.
       htmlParser('<p> 10 < 100 </p>', handler);
       expect(text).toEqual(' 10 < 100 ');
     });
 
     it('should parse newlines in tags', function() {
-      htmlParser('<\ntag\n attr="value"\n>text<\n/\ntag\n>', handler);
+      htmlParser('<tag\n attr="value"\n>text</\ntag\n>', handler);
       expect(start).toEqual({tag:'tag', attrs:{attr:'value'}, unary:false});
       expect(text).toEqual('text');
     });
@@ -134,8 +147,9 @@ describe('HTML', function() {
     expectHTML('a<!DocTyPe html>c.').toEqual('ac.');
   });
 
-  it('should remove nested script', function() {
-    expectHTML('a< SCRIPT >A< SCRIPT >evil< / scrIpt >B< / scrIpt >c.').toEqual('ac.');
+  it('should escape non-start tags', function() {
+    expectHTML('a< SCRIPT >A< SCRIPT >evil< / scrIpt >B< / scrIpt >c.').
+      toBe('a&lt; SCRIPT &gt;A&lt; SCRIPT &gt;evil&lt; / scrIpt &gt;B&lt; / scrIpt &gt;c.');
   });
 
   it('should remove attrs', function() {
@@ -176,14 +190,16 @@ describe('HTML', function() {
     expectHTML(everything).toEqual(everything);
   });
 
-  it('should handle improper html', function() {
+  it('should mangle improper html', function() {
+    // This text is encoded more than a real HTML parser would, but it should render the same.
     expectHTML('< div rel="</div>" alt=abc dir=\'"\' >text< /div>').
-      toEqual('<div rel="&lt;/div&gt;" alt="abc" dir="&#34;">text</div>');
+      toBe('&lt; div rel=&#34;&#34; alt=abc dir=\'&#34;\' &gt;text&lt; /div&gt;');
   });
 
-  it('should handle improper html2', function() {
+  it('should mangle improper html2', function() {
+    // A proper HTML parser would clobber this more in most cases, but it looks reasonable.
     expectHTML('< div rel="</div>" / >').
-      toEqual('<div rel="&lt;/div&gt;"/>');
+      toBe('&lt; div rel=&#34;&#34; / &gt;');
   });
 
   it('should ignore back slash as escape', function() {