Skip to content
This repository was archived by the owner on Apr 12, 2024. It is now read-only.

Commit 25d3d37

Browse files
committed
revert: fix(ngSanitize): follow HTML parser rules for start tags / allow < in text content
This reverts commit 36d2658. This commit broke the ci-checks task when ported into v1.2.x --- I will sort this out shortly.
1 parent af5aacc commit 25d3d37

File tree

2 files changed

+15
-56
lines changed

2 files changed

+15
-56
lines changed

src/ngSanitize/sanitize.js

+7-15
Original file line numberDiff line numberDiff line change
@@ -154,11 +154,11 @@ function sanitizeText(chars) {
154154

155155
// Regular Expressions for parsing tags and attributes
156156
var START_TAG_REGEXP =
157-
/^<((?:[a-zA-Z])[\w:-]*)((?:\s+[\w:-]+(?:\s*=\s*(?:(?:"[^"]*")|(?:'[^']*')|[^>\s]+))?)*)\s*(\/?)\s*(>?)/,
158-
END_TAG_REGEXP = /^<\/\s*([\w:-]+)[^>]*>/,
157+
/^<\s*([\w:-]+)((?:\s+[\w:-]+(?:\s*=\s*(?:(?:"[^"]*")|(?:'[^']*')|[^>\s]+))?)*)\s*(\/?)\s*>/,
158+
END_TAG_REGEXP = /^<\s*\/\s*([\w:-]+)[^>]*>/,
159159
ATTR_REGEXP = /([\w:-]+)(?:\s*=\s*(?:(?:"((?:[^"])*)")|(?:'((?:[^'])*)')|([^>\s]+)))?/g,
160160
BEGIN_TAG_REGEXP = /^</,
161-
BEGING_END_TAGE_REGEXP = /^<\//,
161+
BEGING_END_TAGE_REGEXP = /^<\s*\//,
162162
COMMENT_REGEXP = /<!--(.*?)-->/g,
163163
DOCTYPE_REGEXP = /<!DOCTYPE([^>]*?)>/i,
164164
CDATA_REGEXP = /<!\[CDATA\[(.*?)]]>/g,
@@ -232,11 +232,10 @@ function makeMap(str) {
232232
* @param {object} handler
233233
*/
234234
function htmlParser( html, handler ) {
235-
var index, chars, match, stack = [], last = html, text;
235+
var index, chars, match, stack = [], last = html;
236236
stack.last = function() { return stack[ stack.length - 1 ]; };
237237

238238
while ( html ) {
239-
text = '';
240239
chars = true;
241240

242241
// Make sure we're not in a script or style element
@@ -275,23 +274,16 @@ function htmlParser( html, handler ) {
275274
match = html.match( START_TAG_REGEXP );
276275

277276
if ( match ) {
278-
// We only have a valid start-tag if there is a '>'.
279-
if ( match[4] ) {
280-
html = html.substring( match[0].length );
281-
match[0].replace( START_TAG_REGEXP, parseStartTag );
282-
}
277+
html = html.substring( match[0].length );
278+
match[0].replace( START_TAG_REGEXP, parseStartTag );
283279
chars = false;
284-
} else {
285-
// no ending tag found --- this piece should be encoded as an entity.
286-
text += '<';
287-
html = html.substring(1);
288280
}
289281
}
290282

291283
if ( chars ) {
292284
index = html.indexOf("<");
293285

294-
text += index < 0 ? html : html.substring( 0, index );
286+
var text = index < 0 ? html : html.substring( 0, index );
295287
html = index < 0 ? "" : html.substring( index );
296288

297289
if (handler.chars) handler.chars( decodeEntities(text) );

test/ngSanitize/sanitizeSpec.js

+8-41
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ describe('HTML', function() {
2121

2222
var handler, start, text, comment;
2323
beforeEach(function() {
24-
text = "";
2524
handler = {
2625
start: function(tag, attrs, unary){
2726
start = {
@@ -36,7 +35,7 @@ describe('HTML', function() {
3635
});
3736
},
3837
chars: function(text_){
39-
text += text_;
38+
text = text_;
4039
},
4140
end:function(tag) {
4241
expect(tag).toEqual(start.tag);
@@ -82,31 +81,8 @@ describe('HTML', function() {
8281
expect(text).toEqual('text');
8382
});
8483

85-
it('should not treat "<" followed by a non-/ or non-letter as a tag', function() {
86-
expectHTML('<- text1 text2 <1 text1 text2 <{', handler).
87-
toBe('&lt;- text1 text2 &lt;1 text1 text2 &lt;{');
88-
});
89-
90-
it('should throw badparse if text content contains "<" followed by "/" without matching ">"', function() {
91-
expect(function() {
92-
htmlParser('foo </ bar', handler);
93-
}).toThrowMinErr('$sanitize', 'badparse', 'The sanitizer was unable to parse the following block of html: </ bar');
94-
});
95-
96-
it('should throw badparse if text content contains "<" followed by an ASCII letter without matching ">"', function() {
97-
expect(function() {
98-
htmlParser('foo <a bar', handler);
99-
}).toThrowMinErr('$sanitize', 'badparse', 'The sanitizer was unable to parse the following block of html: <a bar');
100-
});
101-
102-
it('should accept tag delimiters such as "<" inside real tags', function() {
103-
// Assert that the < is part of the text node content, and not part of a tag name.
104-
htmlParser('<p> 10 < 100 </p>', handler);
105-
expect(text).toEqual(' 10 < 100 ');
106-
});
107-
10884
it('should parse newlines in tags', function() {
109-
htmlParser('<tag\n attr="value"\n>text</\ntag\n>', handler);
85+
htmlParser('<\ntag\n attr="value"\n>text<\n/\ntag\n>', handler);
11086
expect(start).toEqual({tag:'tag', attrs:{attr:'value'}, unary:false});
11187
expect(text).toEqual('text');
11288
});
@@ -147,9 +123,8 @@ describe('HTML', function() {
147123
expectHTML('a<!DocTyPe html>c.').toEqual('ac.');
148124
});
149125

150-
it('should escape non-start tags', function() {
151-
expectHTML('a< SCRIPT >A< SCRIPT >evil< / scrIpt >B< / scrIpt >c.').
152-
toBe('a&lt; SCRIPT &gt;A&lt; SCRIPT &gt;evil&lt; / scrIpt &gt;B&lt; / scrIpt &gt;c.');
126+
it('should remove nested script', function() {
127+
expectHTML('a< SCRIPT >A< SCRIPT >evil< / scrIpt >B< / scrIpt >c.').toEqual('ac.');
153128
});
154129

155130
it('should remove attrs', function() {
@@ -190,16 +165,14 @@ describe('HTML', function() {
190165
expectHTML(everything).toEqual(everything);
191166
});
192167

193-
it('should mangle improper html', function() {
194-
// This text is encoded more than a real HTML parser would, but it should render the same.
168+
it('should handle improper html', function() {
195169
expectHTML('< div rel="</div>" alt=abc dir=\'"\' >text< /div>').
196-
toBe('&lt; div rel=&#34;&#34; alt=abc dir=\'&#34;\' &gt;text&lt; /div&gt;');
170+
toEqual('<div rel="&lt;/div&gt;" alt="abc" dir="&#34;">text</div>');
197171
});
198172

199-
it('should mangle improper html2', function() {
200-
// A proper HTML parser would clobber this more in most cases, but it looks reasonable.
173+
it('should handle improper html2', function() {
201174
expectHTML('< div rel="</div>" / >').
202-
toBe('&lt; div rel=&#34;&#34; / &gt;');
175+
toEqual('<div rel="&lt;/div&gt;"/>');
203176
});
204177

205178
it('should ignore back slash as escape', function() {
@@ -222,12 +195,6 @@ describe('HTML', function() {
222195
expectHTML('\na\n').toEqual('&#10;a&#10;');
223196
});
224197

225-
it('should accept tag delimiters such as "<" inside real tags (with nesting)', function() {
226-
//this is an integrated version of the 'should accept tag delimiters such as "<" inside real tags' test
227-
expectHTML('<p> 10 < <span>100</span> </p>')
228-
.toEqual('<p> 10 &lt; <span>100</span> </p>');
229-
});
230-
231198
describe('htmlSanitizerWriter', function() {
232199
/* global htmlSanitizeWriter: false */
233200
if (angular.isUndefined(window.htmlSanitizeWriter)) return;

0 commit comments

Comments
 (0)