From 8781c72134c303f9b98458ac20f12c843a7b8784 Mon Sep 17 00:00:00 2001 From: Sylvain Hamel Date: Sat, 5 Jul 2014 01:03:52 -0400 Subject: [PATCH 1/2] fix: text that looks like an html tag but is not causes [$sanitize:badparse] error --- src/ngSanitize/sanitize.js | 4 ++++ test/ngSanitize/sanitizeSpec.js | 19 ++++++++++++++++++- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/src/ngSanitize/sanitize.js b/src/ngSanitize/sanitize.js index fae50aac259d..593e62f59ef1 100644 --- a/src/ngSanitize/sanitize.js +++ b/src/ngSanitize/sanitize.js @@ -277,6 +277,10 @@ function htmlParser( html, handler ) { html = html.substring( match[0].length ); match[0].replace( START_TAG_REGEXP, parseStartTag ); chars = false; + } else { + // no ending tag found + if (handler.chars) handler.chars( '<' ); + html = html.substring(1); } } diff --git a/test/ngSanitize/sanitizeSpec.js b/test/ngSanitize/sanitizeSpec.js index 76a48087576e..611d9664cc73 100644 --- a/test/ngSanitize/sanitizeSpec.js +++ b/test/ngSanitize/sanitizeSpec.js @@ -21,6 +21,7 @@ describe('HTML', function() { var handler, start, text, comment; beforeEach(function() { + text = ""; handler = { start: function(tag, attrs, unary){ start = { @@ -35,7 +36,7 @@ describe('HTML', function() { }); }, chars: function(text_){ - text = text_; + text += text_; }, end:function(tag) { expect(tag).toEqual(start.tag); @@ -81,6 +82,16 @@ describe('HTML', function() { expect(text).toEqual('text'); }); + it('should parse unterminated tags as regular content', function() { + htmlParser(' 10 < 100

', handler); + expect(text).toEqual(' 10 < 100 '); + }); + it('should parse newlines in tags', function() { htmlParser('<\ntag\n attr="value"\n>text<\n/\ntag\n>', handler); expect(start).toEqual({tag:'tag', attrs:{attr:'value'}, unary:false}); @@ -195,6 +206,12 @@ describe('HTML', function() { expectHTML('\na\n').toEqual(' a '); }); + it('should accept tag delimiters such as "<" inside real tags (with nesting)', function() { + //this is an integrated version of the 'should accept tag delimiters such as "<" inside real tags' test + expectHTML('

10 < 100

') + .toEqual('

10 < 100

'); + }); + describe('htmlSanitizerWriter', function() { /* global htmlSanitizeWriter: false */ if (angular.isUndefined(window.htmlSanitizeWriter)) return; From 5eee03c200eefb85447f521c0be74a8ed4b2e916 Mon Sep 17 00:00:00 2001 From: Caitlin Potter Date: Tue, 15 Jul 2014 13:06:49 -0400 Subject: [PATCH 2/2] fix(ngSanitize): follow HTML parser rules for start tags / allow < in text content ngSanitize will now permit opening braces in text content, provided they are not followed by either an unescaped backslash, or by an ASCII letter (u+0041 - u+005A, u+0061 - u+007A), in compliance with rules of the parsing spec, without taking insertion mode into account. BREAKING CHANGE Previously, $sanitize would "fix" invalid markup in which a space preceded alphanumeric characters in a start-tag. Following this change, any opening angle bracket which is not followed by either a forward slash, or by an ASCII letter (a-z | A-Z) will not be considered a start tag delimiter, per the HTML parsing spec (http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html). --- src/ngSanitize/sanitize.js | 22 +++++++++++--------- test/ngSanitize/sanitizeSpec.js | 36 ++++++++++++++++++++++++--------- 2 files changed, 39 insertions(+), 19 deletions(-) diff --git a/src/ngSanitize/sanitize.js b/src/ngSanitize/sanitize.js index 593e62f59ef1..de6dd3dd135d 100644 --- a/src/ngSanitize/sanitize.js +++ b/src/ngSanitize/sanitize.js @@ -154,11 +154,11 @@ function sanitizeText(chars) { // Regular Expressions for parsing tags and attributes var START_TAG_REGEXP = - /^<\s*([\w:-]+)((?:\s+[\w:-]+(?:\s*=\s*(?:(?:"[^"]*")|(?:'[^']*')|[^>\s]+))?)*)\s*(\/?)\s*>/, - END_TAG_REGEXP = /^<\s*\/\s*([\w:-]+)[^>]*>/, + /^<((?:[a-zA-Z])[\w:-]*)((?:\s+[\w:-]+(?:\s*=\s*(?:(?:"[^"]*")|(?:'[^']*')|[^>\s]+))?)*)\s*(\/?)\s*(>?)/, + END_TAG_REGEXP = /^<\/\s*([\w:-]+)[^>]*>/, ATTR_REGEXP = /([\w:-]+)(?:\s*=\s*(?:(?:"((?:[^"])*)")|(?:'((?:[^'])*)')|([^>\s]+)))?/g, BEGIN_TAG_REGEXP = /^/g, DOCTYPE_REGEXP = /]*?)>/i, CDATA_REGEXP = //g, @@ -232,10 +232,11 @@ function makeMap(str) { * @param {object} handler */ function htmlParser( html, handler ) { - var index, chars, match, stack = [], last = html; + var index, chars, match, stack = [], last = html, text; stack.last = function() { return stack[ stack.length - 1 ]; }; while ( html ) { + text = ''; chars = true; // Make sure we're not in a script or style element @@ -274,12 +275,15 @@ function htmlParser( html, handler ) { match = html.match( START_TAG_REGEXP ); if ( match ) { - html = html.substring( match[0].length ); - match[0].replace( START_TAG_REGEXP, parseStartTag ); + // We only have a valid start-tag if there is a '>'. + if ( match[4] ) { + html = html.substring( match[0].length ); + match[0].replace( START_TAG_REGEXP, parseStartTag ); + } chars = false; } else { - // no ending tag found - if (handler.chars) handler.chars( '<' ); + // no ending tag found --- this piece should be encoded as an entity. + text += '<'; html = html.substring(1); } } @@ -287,7 +291,7 @@ function htmlParser( html, handler ) { if ( chars ) { index = html.indexOf("<"); - var text = index < 0 ? html : html.substring( 0, index ); + text += index < 0 ? html : html.substring( 0, index ); html = index < 0 ? "" : html.substring( index ); if (handler.chars) handler.chars( decodeEntities(text) ); diff --git a/test/ngSanitize/sanitizeSpec.js b/test/ngSanitize/sanitizeSpec.js index 611d9664cc73..5f2594397657 100644 --- a/test/ngSanitize/sanitizeSpec.js +++ b/test/ngSanitize/sanitizeSpec.js @@ -82,18 +82,31 @@ describe('HTML', function() { expect(text).toEqual('text'); }); - it('should parse unterminated tags as regular content', function() { - htmlParser('
"', function() { + expect(function() { + htmlParser('foo "', function() { + expect(function() { + htmlParser('foo 10 < 100

', handler); expect(text).toEqual(' 10 < 100 '); }); it('should parse newlines in tags', function() { - htmlParser('<\ntag\n attr="value"\n>text<\n/\ntag\n>', handler); + htmlParser('text', handler); expect(start).toEqual({tag:'tag', attrs:{attr:'value'}, unary:false}); expect(text).toEqual('text'); }); @@ -134,8 +147,9 @@ describe('HTML', function() { expectHTML('ac.').toEqual('ac.'); }); - it('should remove nested script', function() { - expectHTML('a< SCRIPT >A< SCRIPT >evil< / scrIpt >B< / scrIpt >c.').toEqual('ac.'); + it('should escape non-start tags', function() { + expectHTML('a< SCRIPT >A< SCRIPT >evil< / scrIpt >B< / scrIpt >c.'). + toBe('a< SCRIPT >A< SCRIPT >evil< / scrIpt >B< / scrIpt >c.'); }); it('should remove attrs', function() { @@ -176,14 +190,16 @@ describe('HTML', function() { expectHTML(everything).toEqual(everything); }); - it('should handle improper html', function() { + it('should mangle improper html', function() { + // This text is encoded more than a real HTML parser would, but it should render the same. expectHTML('< div rel="" alt=abc dir=\'"\' >text< /div>'). - toEqual('
text
'); + toBe('< div rel="" alt=abc dir=\'"\' >text< /div>'); }); - it('should handle improper html2', function() { + it('should mangle improper html2', function() { + // A proper HTML parser would clobber this more in most cases, but it looks reasonable. expectHTML('< div rel="" / >'). - toEqual('
'); + toBe('< div rel="" / >'); }); it('should ignore back slash as escape', function() {