From 3dae51e74fb4fcd9272174a8d692d9a929e4fe71 Mon Sep 17 00:00:00 2001 From: paolo-delmundo Date: Sat, 7 Sep 2013 22:43:06 -0400 Subject: [PATCH 1/5] Added case for doctype Added spec --- src/ngSanitize/sanitize.js | 14 ++++++++++++++ test/ngSanitize/sanitizeSpec.js | 9 ++++++++- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/src/ngSanitize/sanitize.js b/src/ngSanitize/sanitize.js index 3d904ad1e0bb..e143c4a8908a 100644 --- a/src/ngSanitize/sanitize.js +++ b/src/ngSanitize/sanitize.js @@ -135,6 +135,7 @@ var START_TAG_REGEXP = /^<\s*([\w:-]+)((?:\s+[\w:-]+(?:\s*=\s*(?:(?:"[^"]*")|(?: BEGIN_TAG_REGEXP = /^/g, + DOCTYPE_REGEXP = //g, CDATA_REGEXP = //g, URI_REGEXP = /^((ftp|https?):\/\/|mailto:|tel:|#)/i, NON_ALPHANUMERIC_REGEXP = /([^\#-~| |!])/g; // Match everything outside of normal chars and " (quote character) @@ -199,10 +200,13 @@ function makeMap(str) { * @param {object} handler */ function htmlParser( html, handler ) { + var index, chars, match, stack = [], last = html; stack.last = function() { return stack[ stack.length - 1 ]; }; while ( html ) { + + console.log(html); chars = true; // Make sure we're not in a script or style element @@ -217,7 +221,14 @@ function htmlParser( html, handler ) { html = html.substring( index + 3 ); chars = false; } + // DOCTYPE + } else if (html.indexOf(""); + if (index >= 0) { + html = html.substring(index); + chars = false; + } // end tag } else if ( BEGING_END_TAGE_REGEXP.test(html) ) { match = html.match( END_TAG_REGEXP ); @@ -262,6 +273,9 @@ function htmlParser( html, handler ) { parseEndTag( "", stack.last() ); } + console.log('at end (html): ', html); + console.log('at end (last): ', last); + if ( html == last ) { throw $sanitizeMinErr('badparse', "The sanitizer was unable to parse the following block of html: {0}", html); } diff --git a/test/ngSanitize/sanitizeSpec.js b/test/ngSanitize/sanitizeSpec.js index f97e86a63768..b1cfafa2af01 100644 --- a/test/ngSanitize/sanitizeSpec.js +++ b/test/ngSanitize/sanitizeSpec.js @@ -24,7 +24,7 @@ describe('HTML', function() { attrs: attrs, unary: unary }; - // Since different browsers handle newlines differenttly we trim + // Since different browsers handle newlines differently we trim // so that it is easier to write tests. angular.forEach(attrs, function(value, key) { attrs[key] = value.replace(/^\s*/, '').replace(/\s*$/, '') @@ -39,6 +39,12 @@ describe('HTML', function() { }; }); + iit('should parse the doctype', function() { + htmlParser('', handler); + expect(text).toEqual(''); + console.log(start); + }); + it('should parse basic format', function() { htmlParser('text', handler); expect(start).toEqual({tag:'tag', attrs:{attr:'value'}, unary:false}); @@ -288,5 +294,6 @@ describe('HTML', function() { }); }); + }); }); From 5ab4638bcda8a5d6d51f0713595190aca8d9058c Mon Sep 17 00:00:00 2001 From: paolo-delmundo Date: Sun, 8 Sep 2013 01:04:54 -0400 Subject: [PATCH 2/5] Changed to use regex instead of indexOf Removed spec. --- src/ngSanitize/sanitize.js | 15 +++++---------- test/ngSanitize/sanitizeSpec.js | 6 ------ 2 files changed, 5 insertions(+), 16 deletions(-) diff --git a/src/ngSanitize/sanitize.js b/src/ngSanitize/sanitize.js index e143c4a8908a..3a5f9358623b 100644 --- a/src/ngSanitize/sanitize.js +++ b/src/ngSanitize/sanitize.js @@ -135,7 +135,7 @@ var START_TAG_REGEXP = /^<\s*([\w:-]+)((?:\s+[\w:-]+(?:\s*=\s*(?:(?:"[^"]*")|(?: BEGIN_TAG_REGEXP = /^/g, - DOCTYPE_REGEXP = //g, + DOCTYPE_REGEXP = //i, CDATA_REGEXP = //g, URI_REGEXP = /^((ftp|https?):\/\/|mailto:|tel:|#)/i, NON_ALPHANUMERIC_REGEXP = /([^\#-~| |!])/g; // Match everything outside of normal chars and " (quote character) @@ -205,8 +205,6 @@ function htmlParser( html, handler ) { stack.last = function() { return stack[ stack.length - 1 ]; }; while ( html ) { - - console.log(html); chars = true; // Make sure we're not in a script or style element @@ -222,11 +220,11 @@ function htmlParser( html, handler ) { chars = false; } // DOCTYPE - } else if (html.indexOf(""); + } else if ( DOCTYPE_REGEXP.test(html) ) { + match = html.match( DOCTYPE_REGEXP ); - if (index >= 0) { - html = html.substring(index); + if ( match ) { + html = html.substring( match[0].length ); chars = false; } // end tag @@ -273,9 +271,6 @@ function htmlParser( html, handler ) { parseEndTag( "", stack.last() ); } - console.log('at end (html): ', html); - console.log('at end (last): ', last); - if ( html == last ) { throw $sanitizeMinErr('badparse', "The sanitizer was unable to parse the following block of html: {0}", html); } diff --git a/test/ngSanitize/sanitizeSpec.js b/test/ngSanitize/sanitizeSpec.js index b1cfafa2af01..d0b3e6c05ee6 100644 --- a/test/ngSanitize/sanitizeSpec.js +++ b/test/ngSanitize/sanitizeSpec.js @@ -39,12 +39,6 @@ describe('HTML', function() { }; }); - iit('should parse the doctype', function() { - htmlParser('', handler); - expect(text).toEqual(''); - console.log(start); - }); - it('should parse basic format', function() { htmlParser('text', handler); expect(start).toEqual({tag:'tag', attrs:{attr:'value'}, unary:false}); From 855bcfd67f9cf140e30954e35d914a9468944297 Mon Sep 17 00:00:00 2001 From: paolo-delmundo Date: Sun, 8 Sep 2013 22:13:06 -0400 Subject: [PATCH 3/5] Fixed bug with doctype replacement Added unit test to sanitizeSpec.js. --- src/ngSanitize/sanitize.js | 2 +- test/ngSanitize/sanitizeSpec.js | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/ngSanitize/sanitize.js b/src/ngSanitize/sanitize.js index 3a5f9358623b..ab45ccbf47c3 100644 --- a/src/ngSanitize/sanitize.js +++ b/src/ngSanitize/sanitize.js @@ -224,7 +224,7 @@ function htmlParser( html, handler ) { match = html.match( DOCTYPE_REGEXP ); if ( match ) { - html = html.substring( match[0].length ); + html = html.replace( match[0] , ''); chars = false; } // end tag diff --git a/test/ngSanitize/sanitizeSpec.js b/test/ngSanitize/sanitizeSpec.js index d0b3e6c05ee6..2b8c75744f42 100644 --- a/test/ngSanitize/sanitizeSpec.js +++ b/test/ngSanitize/sanitizeSpec.js @@ -80,6 +80,12 @@ describe('HTML', function() { expectHTML('a