|
1 |
| -import Parser from 'parse5/lib/parser/index.js' |
2 |
| -import {pointStart, pointEnd} from 'unist-util-position' |
3 |
| -import {visit} from 'unist-util-visit' |
4 |
| -import {fromParse5} from 'hast-util-from-parse5' |
5 |
| -import {toParse5} from 'hast-util-to-parse5' |
6 |
| -import {htmlVoidElements} from 'html-void-elements' |
7 |
| -import {webNamespaces} from 'web-namespaces' |
8 |
| -import {zwitch} from 'zwitch' |
| 1 | +/** |
| 2 | + * @typedef {import('./lib/index.js').Options} Options |
| 3 | + */ |
9 | 4 |
|
10 |
| -var inTemplateMode = 'IN_TEMPLATE_MODE' |
11 |
| -var dataState = 'DATA_STATE' |
12 |
| -var characterToken = 'CHARACTER_TOKEN' |
13 |
| -var startTagToken = 'START_TAG_TOKEN' |
14 |
| -var endTagToken = 'END_TAG_TOKEN' |
15 |
| -var commentToken = 'COMMENT_TOKEN' |
16 |
| -var doctypeToken = 'DOCTYPE_TOKEN' |
17 |
| - |
18 |
| -var parseOptions = {sourceCodeLocationInfo: true, scriptingEnabled: false} |
19 |
| - |
20 |
| -export function raw(tree, file, options) { |
21 |
| - var parser = new Parser(parseOptions) |
22 |
| - var one = zwitch('type', { |
23 |
| - handlers: { |
24 |
| - root, |
25 |
| - element, |
26 |
| - text, |
27 |
| - comment, |
28 |
| - doctype, |
29 |
| - raw: handleRaw |
30 |
| - }, |
31 |
| - unknown |
32 |
| - }) |
33 |
| - var stitches |
34 |
| - var tokenizer |
35 |
| - var preprocessor |
36 |
| - var posTracker |
37 |
| - var locationTracker |
38 |
| - var result |
39 |
| - var index |
40 |
| - |
41 |
| - if (file && !('contents' in file)) { |
42 |
| - options = file |
43 |
| - file = undefined |
44 |
| - } |
45 |
| - |
46 |
| - if (options && options.passThrough) { |
47 |
| - index = -1 |
48 |
| - |
49 |
| - while (++index < options.passThrough.length) { |
50 |
| - one.handlers[options.passThrough[index]] = stitch |
51 |
| - } |
52 |
| - } |
53 |
| - |
54 |
| - result = fromParse5(documentMode(tree) ? document() : fragment(), file) |
55 |
| - |
56 |
| - if (stitches) { |
57 |
| - visit(result, 'comment', mend) |
58 |
| - } |
59 |
| - |
60 |
| - // Unpack if possible and when not given a `root`. |
61 |
| - if (tree.type !== 'root' && result.children.length === 1) { |
62 |
| - return result.children[0] |
63 |
| - } |
64 |
| - |
65 |
| - return result |
66 |
| - |
67 |
| - function mend(node, index, parent) { |
68 |
| - if (node.value.stitch) { |
69 |
| - parent.children[index] = node.value.stitch |
70 |
| - return index |
71 |
| - } |
72 |
| - } |
73 |
| - |
74 |
| - function fragment() { |
75 |
| - var context = { |
76 |
| - nodeName: 'template', |
77 |
| - tagName: 'template', |
78 |
| - attrs: [], |
79 |
| - namespaceURI: webNamespaces.html, |
80 |
| - childNodes: [] |
81 |
| - } |
82 |
| - var mock = { |
83 |
| - nodeName: 'documentmock', |
84 |
| - tagName: 'documentmock', |
85 |
| - attrs: [], |
86 |
| - namespaceURI: webNamespaces.html, |
87 |
| - childNodes: [] |
88 |
| - } |
89 |
| - var doc = {nodeName: '#document-fragment', childNodes: []} |
90 |
| - |
91 |
| - parser._bootstrap(mock, context) |
92 |
| - parser._pushTmplInsertionMode(inTemplateMode) |
93 |
| - parser._initTokenizerForFragmentParsing() |
94 |
| - parser._insertFakeRootElement() |
95 |
| - parser._resetInsertionMode() |
96 |
| - parser._findFormInFragmentContext() |
97 |
| - |
98 |
| - tokenizer = parser.tokenizer |
99 |
| - preprocessor = tokenizer.preprocessor |
100 |
| - locationTracker = tokenizer.__mixins[0] |
101 |
| - posTracker = locationTracker.posTracker |
102 |
| - |
103 |
| - one(tree) |
104 |
| - |
105 |
| - parser._adoptNodes(mock.childNodes[0], doc) |
106 |
| - |
107 |
| - return doc |
108 |
| - } |
109 |
| - |
110 |
| - function document() { |
111 |
| - var doc = parser.treeAdapter.createDocument() |
112 |
| - |
113 |
| - parser._bootstrap(doc, null) |
114 |
| - tokenizer = parser.tokenizer |
115 |
| - preprocessor = tokenizer.preprocessor |
116 |
| - locationTracker = tokenizer.__mixins[0] |
117 |
| - posTracker = locationTracker.posTracker |
118 |
| - |
119 |
| - one(tree) |
120 |
| - |
121 |
| - return doc |
122 |
| - } |
123 |
| - |
124 |
| - function all(nodes) { |
125 |
| - var index = -1 |
126 |
| - |
127 |
| - /* istanbul ignore else - invalid nodes, see rehypejs/rehype-raw#7. */ |
128 |
| - if (nodes) { |
129 |
| - while (++index < nodes.length) { |
130 |
| - one(nodes[index]) |
131 |
| - } |
132 |
| - } |
133 |
| - } |
134 |
| - |
135 |
| - function root(node) { |
136 |
| - all(node.children) |
137 |
| - } |
138 |
| - |
139 |
| - function element(node) { |
140 |
| - resetTokenizer() |
141 |
| - parser._processToken(startTag(node), webNamespaces.html) |
142 |
| - |
143 |
| - all(node.children) |
144 |
| - |
145 |
| - if (!htmlVoidElements.includes(node.tagName)) { |
146 |
| - resetTokenizer() |
147 |
| - parser._processToken(endTag(node)) |
148 |
| - } |
149 |
| - } |
150 |
| - |
151 |
| - function text(node) { |
152 |
| - resetTokenizer() |
153 |
| - parser._processToken({ |
154 |
| - type: characterToken, |
155 |
| - chars: node.value, |
156 |
| - location: createParse5Location(node) |
157 |
| - }) |
158 |
| - } |
159 |
| - |
160 |
| - function doctype(node) { |
161 |
| - var p5 = toParse5(node) |
162 |
| - resetTokenizer() |
163 |
| - parser._processToken({ |
164 |
| - type: doctypeToken, |
165 |
| - name: p5.name, |
166 |
| - forceQuirks: false, |
167 |
| - publicId: p5.publicId, |
168 |
| - systemId: p5.systemId, |
169 |
| - location: createParse5Location(node) |
170 |
| - }) |
171 |
| - } |
172 |
| - |
173 |
| - function comment(node) { |
174 |
| - resetTokenizer() |
175 |
| - parser._processToken({ |
176 |
| - type: commentToken, |
177 |
| - data: node.value, |
178 |
| - location: createParse5Location(node) |
179 |
| - }) |
180 |
| - } |
181 |
| - |
182 |
| - function handleRaw(node) { |
183 |
| - var start = pointStart(node) |
184 |
| - var line = start.line || 1 |
185 |
| - var column = start.column || 1 |
186 |
| - var offset = start.offset || 0 |
187 |
| - var token |
188 |
| - |
189 |
| - // Reset preprocessor: |
190 |
| - // See: <https://github.com/inikulin/parse5/blob/9c683e1/packages/parse5/lib/tokenizer/preprocessor.js>. |
191 |
| - preprocessor.html = null |
192 |
| - preprocessor.pos = -1 |
193 |
| - preprocessor.lastGapPos = -1 |
194 |
| - preprocessor.lastCharPos = -1 |
195 |
| - preprocessor.gapStack = [] |
196 |
| - preprocessor.skipNextNewLine = false |
197 |
| - preprocessor.lastChunkWritten = false |
198 |
| - preprocessor.endOfChunkHit = false |
199 |
| - |
200 |
| - // Reset preprocessor mixin: |
201 |
| - // See: <https://github.com/inikulin/parse5/blob/9c683e1/packages/parse5/lib/extensions/position-tracking/preprocessor-mixin.js>. |
202 |
| - posTracker.isEol = false |
203 |
| - posTracker.lineStartPos = -column + 1 // Looks weird, but ensures we get correct positional info. |
204 |
| - posTracker.droppedBufferSize = offset |
205 |
| - posTracker.offset = 0 |
206 |
| - posTracker.col = 1 |
207 |
| - posTracker.line = line |
208 |
| - |
209 |
| - // Reset location tracker: |
210 |
| - // See: <https://github.com/inikulin/parse5/blob/9c683e1/packages/parse5/lib/extensions/location-info/tokenizer-mixin.js>. |
211 |
| - locationTracker.currentAttrLocation = null |
212 |
| - locationTracker.ctLoc = createParse5Location(node) |
213 |
| - |
214 |
| - // See the code for `parse` and `parseFragment`: |
215 |
| - // See: <https://github.com/inikulin/parse5/blob/9c683e1/packages/parse5/lib/parser/index.js#L371>. |
216 |
| - tokenizer.write(node.value) |
217 |
| - parser._runParsingLoop(null) |
218 |
| - |
219 |
| - // Process final characters if they’re still there after hibernating. |
220 |
| - // Similar to: |
221 |
| - // See: <https://github.com/inikulin/parse5/blob/9c683e1/packages/parse5/lib/extensions/location-info/tokenizer-mixin.js#L95>. |
222 |
| - token = tokenizer.currentCharacterToken |
223 |
| - |
224 |
| - if (token) { |
225 |
| - token.location.endLine = posTracker.line |
226 |
| - token.location.endCol = posTracker.col + 1 |
227 |
| - token.location.endOffset = posTracker.offset + 1 |
228 |
| - parser._processToken(token) |
229 |
| - } |
230 |
| - } |
231 |
| - |
232 |
| - function stitch(node) { |
233 |
| - var clone = Object.assign({}, node) |
234 |
| - |
235 |
| - stitches = true |
236 |
| - |
237 |
| - // Recurse, because to somewhat handle `[<x>]</x>` (where `[]` denotes the |
238 |
| - // passed through node). |
239 |
| - if (node.children) { |
240 |
| - clone.children = raw( |
241 |
| - {type: 'root', children: node.children}, |
242 |
| - file, |
243 |
| - options |
244 |
| - ).children |
245 |
| - } |
246 |
| - |
247 |
| - // Hack: `value` is supposed to be a string, but as none of the tools |
248 |
| - // (`parse5` or `hast-util-from-parse5`) looks at it, we can pass nodes |
249 |
| - // through. |
250 |
| - comment({value: {stitch: clone}}) |
251 |
| - } |
252 |
| - |
253 |
| - function resetTokenizer() { |
254 |
| - // Reset tokenizer: |
255 |
| - // See: <https://github.com/inikulin/parse5/blob/9c683e1/packages/parse5/lib/tokenizer/index.js#L218-L234>. |
256 |
| - // Especially putting it back in the `data` state is useful: some elements, |
257 |
| - // like textareas and iframes, change the state. |
258 |
| - // See GH-7. |
259 |
| - // But also if broken HTML is in `raw`, and then a correct element is given. |
260 |
| - // See GH-11. |
261 |
| - tokenizer.tokenQueue = [] |
262 |
| - tokenizer.state = dataState |
263 |
| - tokenizer.returnState = '' |
264 |
| - tokenizer.charRefCode = -1 |
265 |
| - tokenizer.tempBuff = [] |
266 |
| - tokenizer.lastStartTagName = '' |
267 |
| - tokenizer.consumedAfterSnapshot = -1 |
268 |
| - tokenizer.active = false |
269 |
| - tokenizer.currentCharacterToken = null |
270 |
| - tokenizer.currentToken = null |
271 |
| - tokenizer.currentAttr = null |
272 |
| - } |
273 |
| -} |
274 |
| - |
275 |
| -function startTag(node) { |
276 |
| - var location = createParse5Location(node) |
277 |
| - |
278 |
| - location.startTag = Object.assign({}, location) |
279 |
| - |
280 |
| - return { |
281 |
| - type: startTagToken, |
282 |
| - tagName: node.tagName, |
283 |
| - selfClosing: false, |
284 |
| - attrs: attributes(node), |
285 |
| - location |
286 |
| - } |
287 |
| -} |
288 |
| - |
289 |
| -function attributes(node) { |
290 |
| - return toParse5({ |
291 |
| - tagName: node.tagName, |
292 |
| - type: 'element', |
293 |
| - properties: node.properties |
294 |
| - }).attrs |
295 |
| -} |
296 |
| - |
297 |
| -function endTag(node) { |
298 |
| - var location = createParse5Location(node) |
299 |
| - |
300 |
| - location.endTag = Object.assign({}, location) |
301 |
| - |
302 |
| - return { |
303 |
| - type: endTagToken, |
304 |
| - tagName: node.tagName, |
305 |
| - attrs: [], |
306 |
| - location |
307 |
| - } |
308 |
| -} |
309 |
| - |
310 |
| -function unknown(node) { |
311 |
| - throw new Error('Cannot compile `' + node.type + '` node') |
312 |
| -} |
313 |
| - |
314 |
| -function documentMode(node) { |
315 |
| - var head = node.type === 'root' ? node.children[0] : node |
316 |
| - |
317 |
| - return head && (head.type === 'doctype' || head.tagName === 'html') |
318 |
| -} |
319 |
| - |
320 |
| -function createParse5Location(node) { |
321 |
| - var start = pointStart(node) |
322 |
| - var end = pointEnd(node) |
323 |
| - |
324 |
| - return { |
325 |
| - startLine: start.line, |
326 |
| - startCol: start.column, |
327 |
| - startOffset: start.offset, |
328 |
| - endLine: end.line, |
329 |
| - endCol: end.column, |
330 |
| - endOffset: end.offset |
331 |
| - } |
332 |
| -} |
| 5 | +export {raw} from './lib/index.js' |
0 commit comments