Skip to content

Commit 1175a9b

Browse files
committed
Commit striptags.js
1 parent eb63495 commit 1175a9b

File tree

1 file changed

+247
-0
lines changed

1 file changed

+247
-0
lines changed

src/lib/striptags.js

Lines changed: 247 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,247 @@
1+
// The MIT License (MIT)
2+
//
3+
// Copyright (c) [2017] [Eric Norris]
4+
//
5+
// Permission is hereby granted, free of charge, to any person obtaining a copy
6+
// of this software and associated documentation files (the "Software"), to deal
7+
// in the Software without restriction, including without limitation the rights
8+
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
// copies of the Software, and to permit persons to whom the Software is
10+
// furnished to do so, subject to the following conditions:
11+
//
12+
// The above copyright notice and this permission notice shall be included in all
13+
// copies or substantial portions of the Software.
14+
//
15+
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
// SOFTWARE.
22+
23+
'use strict';
24+
25+
(function(global) {
26+
// minimal symbol polyfill for IE11 and others
27+
if (typeof Symbol !== 'function') {
28+
var Symbol = function(name) {
29+
return name;
30+
};
31+
32+
Symbol.nonNative = true;
33+
}
34+
35+
const STATE_PLAINTEXT = Symbol('plaintext');
36+
const STATE_HTML = Symbol('html');
37+
const STATE_COMMENT = Symbol('comment');
38+
39+
const ALLOWED_TAGS_REGEX = /<(\w*)>/g;
40+
const NORMALIZE_TAG_REGEX = /<\/?([^\s\/>]+)/;
41+
42+
function striptags(html, allowable_tags, tag_replacement) {
43+
html = html || '';
44+
allowable_tags = allowable_tags || [];
45+
tag_replacement = tag_replacement || '';
46+
47+
const context = init_context(allowable_tags, tag_replacement);
48+
49+
return striptags_internal(html, context);
50+
}
51+
52+
function init_striptags_stream(allowable_tags, tag_replacement) {
53+
allowable_tags = allowable_tags || [];
54+
tag_replacement = tag_replacement || '';
55+
56+
const context = init_context(allowable_tags, tag_replacement);
57+
58+
return function striptags_stream(html) {
59+
return striptags_internal(html || '', context);
60+
};
61+
}
62+
63+
striptags.init_streaming_mode = init_striptags_stream;
64+
65+
function init_context(allowable_tags, tag_replacement) {
66+
allowable_tags = parse_allowable_tags(allowable_tags);
67+
68+
return {
69+
allowable_tags: allowable_tags,
70+
tag_replacement: tag_replacement,
71+
72+
state: STATE_PLAINTEXT,
73+
tag_buffer: '',
74+
depth: 0,
75+
in_quote_char: '',
76+
};
77+
}
78+
79+
function striptags_internal(html, context) {
80+
const allowable_tags = context.allowable_tags;
81+
const tag_replacement = context.tag_replacement;
82+
83+
let state = context.state;
84+
let tag_buffer = context.tag_buffer;
85+
let depth = context.depth;
86+
let in_quote_char = context.in_quote_char;
87+
let output = '';
88+
89+
for (let idx = 0, length = html.length; idx < length; idx++) {
90+
const char = html[idx];
91+
92+
if (state === STATE_PLAINTEXT) {
93+
switch (char) {
94+
case '<':
95+
state = STATE_HTML;
96+
tag_buffer += char;
97+
break;
98+
99+
default:
100+
output += char;
101+
break;
102+
}
103+
} else if (state === STATE_HTML) {
104+
switch (char) {
105+
case '<':
106+
// ignore '<' if inside a quote
107+
if (in_quote_char) {
108+
break;
109+
}
110+
111+
// we're seeing a nested '<'
112+
depth++;
113+
break;
114+
115+
case '>':
116+
// ignore '>' if inside a quote
117+
if (in_quote_char) {
118+
break;
119+
}
120+
121+
// something like this is happening: '<<>>'
122+
if (depth) {
123+
depth--;
124+
125+
break;
126+
}
127+
128+
// this is closing the tag in tag_buffer
129+
in_quote_char = '';
130+
state = STATE_PLAINTEXT;
131+
tag_buffer += '>';
132+
133+
if (allowable_tags.has(normalize_tag(tag_buffer))) {
134+
output += tag_buffer;
135+
} else {
136+
output += tag_replacement;
137+
}
138+
139+
tag_buffer = '';
140+
break;
141+
142+
case '"':
143+
case "'":
144+
// catch both single and double quotes
145+
146+
if (char === in_quote_char) {
147+
in_quote_char = '';
148+
} else {
149+
in_quote_char = in_quote_char || char;
150+
}
151+
152+
tag_buffer += char;
153+
break;
154+
155+
case '-':
156+
if (tag_buffer === '<!-') {
157+
state = STATE_COMMENT;
158+
}
159+
160+
tag_buffer += char;
161+
break;
162+
163+
case ' ':
164+
case '\n':
165+
if (tag_buffer === '<') {
166+
state = STATE_PLAINTEXT;
167+
output += '< ';
168+
tag_buffer = '';
169+
170+
break;
171+
}
172+
173+
tag_buffer += char;
174+
break;
175+
176+
default:
177+
tag_buffer += char;
178+
break;
179+
}
180+
} else if (state === STATE_COMMENT) {
181+
switch (char) {
182+
case '>':
183+
if (tag_buffer.slice(-2) == '--') {
184+
// close the comment
185+
state = STATE_PLAINTEXT;
186+
}
187+
188+
tag_buffer = '';
189+
break;
190+
191+
default:
192+
tag_buffer += char;
193+
break;
194+
}
195+
}
196+
}
197+
198+
// save the context for future iterations
199+
context.state = state;
200+
context.tag_buffer = tag_buffer;
201+
context.depth = depth;
202+
context.in_quote_char = in_quote_char;
203+
204+
return output;
205+
}
206+
207+
function parse_allowable_tags(allowable_tags) {
208+
let tag_set = new Set();
209+
210+
if (typeof allowable_tags === 'string') {
211+
let match;
212+
213+
while ((match = ALLOWED_TAGS_REGEX.exec(allowable_tags))) {
214+
tag_set.add(match[1]);
215+
}
216+
} else if (
217+
!Symbol.nonNative &&
218+
typeof allowable_tags[Symbol.iterator] === 'function'
219+
) {
220+
tag_set = new Set(allowable_tags);
221+
} else if (typeof allowable_tags.forEach === 'function') {
222+
// IE11 compatible
223+
allowable_tags.forEach(tag_set.add, tag_set);
224+
}
225+
226+
return tag_set;
227+
}
228+
229+
function normalize_tag(tag_buffer) {
230+
const match = NORMALIZE_TAG_REGEX.exec(tag_buffer);
231+
232+
return match ? match[1].toLowerCase() : null;
233+
}
234+
235+
if (typeof define === 'function' && define.amd) {
236+
// AMD
237+
define(function module_factory() {
238+
return striptags;
239+
});
240+
} else if (typeof module === 'object' && module.exports) {
241+
// Node
242+
module.exports = striptags;
243+
} else {
244+
// Browser
245+
global.striptags = striptags;
246+
}
247+
})(this);

0 commit comments

Comments
 (0)