|
1 |
| -import {embedded} from 'hast-util-embedded' |
2 |
| -import {convertElement} from 'hast-util-is-element' |
3 |
| -import {phrasing} from 'hast-util-phrasing' |
4 |
| -import toString from 'hast-util-to-string' |
5 |
| -import {whitespace} from 'hast-util-whitespace' |
6 |
| -import {toString as nlcstToString} from 'nlcst-to-string' |
7 |
| -import {pointStart} from 'unist-util-position' |
8 |
| -import vfileLocation from 'vfile-location' |
9 |
| - |
10 |
| -var push = [].push |
11 |
| - |
12 |
| -var source = convertElement(['code', dataNlcstSourced]) |
13 |
| -var ignore = convertElement([ |
14 |
| - 'script', |
15 |
| - 'style', |
16 |
| - 'svg', |
17 |
| - 'math', |
18 |
| - 'del', |
19 |
| - dataNlcstIgnore |
20 |
| -]) |
21 |
| -var explicit = convertElement(['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6']) |
22 |
| - |
23 |
| -var flowAccepting = convertElement([ |
24 |
| - 'body', |
25 |
| - 'article', |
26 |
| - 'section', |
27 |
| - 'blockquote', |
28 |
| - 'nav', |
29 |
| - 'aside', |
30 |
| - 'header', |
31 |
| - 'footer', |
32 |
| - 'address', |
33 |
| - 'li', |
34 |
| - 'dt', |
35 |
| - 'dd', |
36 |
| - 'figure', |
37 |
| - 'figcaption', |
38 |
| - 'div', |
39 |
| - 'main', |
40 |
| - 'caption', |
41 |
| - 'td', |
42 |
| - 'th', |
43 |
| - 'form', |
44 |
| - 'fieldset', |
45 |
| - 'details', |
46 |
| - 'dialog' |
47 |
| -]) |
48 |
| - |
49 |
| -// See: <https://html.spec.whatwg.org/multipage/dom.html#paragraphs> |
50 |
| -var unravelInParagraph = convertElement(['a', 'ins', 'del', 'map']) |
51 |
| - |
52 |
| -// Transform `tree` to nlcst. |
53 |
| -export function toNlcst(tree, file, Parser) { |
54 |
| - var parser |
55 |
| - var location |
56 |
| - var results |
57 |
| - var doc |
58 |
| - |
59 |
| - // Warn for invalid parameters. |
60 |
| - if (!tree || !tree.type) { |
61 |
| - throw new Error('hast-util-to-nlcst expected node') |
62 |
| - } |
63 |
| - |
64 |
| - if (!file || !file.messages) { |
65 |
| - throw new Error('hast-util-to-nlcst expected file') |
66 |
| - } |
67 |
| - |
68 |
| - // Construct parser. |
69 |
| - if (!Parser) { |
70 |
| - throw new Error('hast-util-to-nlcst expected parser') |
71 |
| - } |
72 |
| - |
73 |
| - if (!pointStart(tree).line || !pointStart(tree).column) { |
74 |
| - throw new Error('hast-util-to-nlcst expected position on nodes') |
75 |
| - } |
76 |
| - |
77 |
| - doc = String(file) |
78 |
| - location = vfileLocation(doc) |
79 |
| - parser = 'parse' in Parser ? Parser : new Parser() |
80 |
| - |
81 |
| - // Transform hast to nlcst, and pass these into `parser.parse` to insert |
82 |
| - // sentences, paragraphs where needed. |
83 |
| - results = [] |
84 |
| - |
85 |
| - find(tree) |
86 |
| - |
87 |
| - return { |
88 |
| - type: 'RootNode', |
89 |
| - children: results, |
90 |
| - position: {start: location.toPoint(0), end: location.toPoint(doc.length)} |
91 |
| - } |
92 |
| - |
93 |
| - function find(node) { |
94 |
| - if (node.type === 'root') { |
95 |
| - findAll(node.children) |
96 |
| - } else if (node.type === 'element' && !ignore(node)) { |
97 |
| - if (explicit(node)) { |
98 |
| - // Explicit paragraph. |
99 |
| - add(node) |
100 |
| - } else if (flowAccepting(node)) { |
101 |
| - // Slightly simplified version of: <https://html.spec.whatwg.org/#paragraphs>. |
102 |
| - implicit(flattenAll(node.children)) |
103 |
| - } else { |
104 |
| - // Dig deeper. |
105 |
| - findAll(node.children) |
106 |
| - } |
107 |
| - } |
108 |
| - } |
109 |
| - |
110 |
| - function findAll(children) { |
111 |
| - var index = -1 |
112 |
| - |
113 |
| - while (++index < children.length) { |
114 |
| - find(children[index]) |
115 |
| - } |
116 |
| - } |
117 |
| - |
118 |
| - function flattenAll(children) { |
119 |
| - var results = [] |
120 |
| - var index = -1 |
121 |
| - |
122 |
| - while (++index < children.length) { |
123 |
| - if (unravelInParagraph(children[index])) { |
124 |
| - push.apply(results, flattenAll(children[index].children)) |
125 |
| - } else { |
126 |
| - results.push(children[index]) |
127 |
| - } |
128 |
| - } |
129 |
| - |
130 |
| - return results |
131 |
| - } |
132 |
| - |
133 |
| - function add(node) { |
134 |
| - var result = ('length' in node ? all : one)(node) |
135 |
| - |
136 |
| - if (result.length > 0) { |
137 |
| - results.push(parser.tokenizeParagraph(result)) |
138 |
| - } |
139 |
| - } |
140 |
| - |
141 |
| - function implicit(children) { |
142 |
| - var index = -1 |
143 |
| - var start = -1 |
144 |
| - var viable |
145 |
| - var child |
146 |
| - |
147 |
| - while (++index <= children.length) { |
148 |
| - child = children[index] |
149 |
| - |
150 |
| - if (child && phrasing(child)) { |
151 |
| - if (start === -1) start = index |
152 |
| - |
153 |
| - if (!viable && !embedded(child) && !whitespace(child)) { |
154 |
| - viable = true |
155 |
| - } |
156 |
| - } else if (child && start === -1) { |
157 |
| - find(child) |
158 |
| - start = index + 1 |
159 |
| - } else if (start !== -1) { |
160 |
| - ;(viable ? add : findAll)(children.slice(start, index)) |
161 |
| - |
162 |
| - if (child) { |
163 |
| - find(child) |
164 |
| - } |
165 |
| - |
166 |
| - viable = null |
167 |
| - start = -1 |
168 |
| - } |
169 |
| - } |
170 |
| - } |
171 |
| - |
172 |
| - // Convert `node` (hast) to nlcst. |
173 |
| - function one(node) { |
174 |
| - var replacement |
175 |
| - var change |
176 |
| - |
177 |
| - if (node.type === 'text') { |
178 |
| - replacement = parser.tokenize(node.value) |
179 |
| - change = true |
180 |
| - } else if (node.type === 'element' && !ignore(node)) { |
181 |
| - if (node.tagName === 'wbr') { |
182 |
| - replacement = [parser.tokenizeWhiteSpace(' ')] |
183 |
| - change = true |
184 |
| - } else if (node.tagName === 'br') { |
185 |
| - replacement = [parser.tokenizeWhiteSpace('\n')] |
186 |
| - change = true |
187 |
| - } else if (source(node)) { |
188 |
| - replacement = [parser.tokenizeSource(toString(node))] |
189 |
| - change = true |
190 |
| - } else { |
191 |
| - replacement = all(node.children) |
192 |
| - } |
193 |
| - } |
194 |
| - |
195 |
| - return change |
196 |
| - ? patch(replacement, location, location.toOffset(pointStart(node))) |
197 |
| - : replacement |
198 |
| - } |
199 |
| - |
200 |
| - // Convert all `children` (hast) to nlcst. |
201 |
| - function all(children) { |
202 |
| - var results = [] |
203 |
| - var index = -1 |
204 |
| - |
205 |
| - while (++index < children.length) { |
206 |
| - push.apply(results, one(children[index]) || []) |
207 |
| - } |
208 |
| - |
209 |
| - return results |
210 |
| - } |
211 |
| - |
212 |
| - // Patch a position on each node in `nodes`. |
213 |
| - // `offset` is the offset in `file` this run of content starts at. |
214 |
| - // |
215 |
| - // Note that nlcst nodes are concrete, meaning that their starting and ending |
216 |
| - // positions can be inferred from their content. |
217 |
| - function patch(nodes, location, offset) { |
218 |
| - var index = -1 |
219 |
| - var start = offset |
220 |
| - var end |
221 |
| - var node |
222 |
| - |
223 |
| - while (++index < nodes.length) { |
224 |
| - node = nodes[index] |
225 |
| - |
226 |
| - if (node.children) { |
227 |
| - patch(node.children, location, start) |
228 |
| - } |
229 |
| - |
230 |
| - end = start + nlcstToString(node).length |
231 |
| - |
232 |
| - node.position = { |
233 |
| - start: location.toPoint(start), |
234 |
| - end: location.toPoint(end) |
235 |
| - } |
236 |
| - |
237 |
| - start = end |
238 |
| - } |
239 |
| - |
240 |
| - return nodes |
241 |
| - } |
242 |
| -} |
243 |
| - |
244 |
| -function dataNlcstSourced(node) { |
245 |
| - return node.properties.dataNlcst === 'source' |
246 |
| -} |
247 |
| - |
248 |
| -function dataNlcstIgnore(node) { |
249 |
| - return node.properties.dataNlcst === 'ignore' |
250 |
| -} |
| 1 | +export {toNlcst} from './lib/index.js' |
0 commit comments