14
14
* Any parent.
15
15
* @typedef {'normal' | 'pre' | 'nowrap' | 'pre-wrap' } Whitespace
16
16
* Valid and useful whitespace values (from CSS).
17
- * @typedef {boolean } BreakValue
18
- * Whether there was a break.
19
- * @typedef {1 | 2 } BreakNumber
20
- * Specific break.
17
+ * @typedef {0 | 1 | 2 } BreakNumber
18
+ * Specific break:
19
+ *
20
+ * * `0` — space
21
+ * * `1` — line ending
22
+ * * `2` — blank line
21
23
* @typedef {'\n' } BreakForce
22
24
* Forced break.
25
+ * @typedef {boolean } BreakValue
26
+ * Whether there was a break.
23
27
* @typedef {BreakValue | BreakNumber | undefined } BreakBefore
24
28
* Any value for a break before.
25
29
* @typedef {BreakValue | BreakNumber | BreakForce | undefined } BreakAfter
@@ -156,6 +160,9 @@ export function toText(tree, options = {}) {
156
160
breakAfter : false
157
161
} )
158
162
163
+ /** @type {Array<string | BreakNumber> } */
164
+ const results = [ ]
165
+
159
166
// Treat `text` and `comment` as having normal white-space.
160
167
// This deviates from the spec as in the DOM the node’s `.data` has to be
161
168
// returned.
@@ -165,7 +172,13 @@ export function toText(tree, options = {}) {
165
172
// Nodes without children are treated as a void element, so `doctype` is thus
166
173
// ignored.
167
174
if ( tree . type === 'text' || tree . type === 'comment' ) {
168
- return collectText ( tree , { whitespace, breakBefore : true , breakAfter : true } )
175
+ results . push (
176
+ ...collectText ( tree , {
177
+ whitespace,
178
+ breakBefore : true ,
179
+ breakAfter : true
180
+ } )
181
+ )
169
182
}
170
183
171
184
// 1. If this element is not being rendered, or if the user agent is a
@@ -179,8 +192,6 @@ export function toText(tree, options = {}) {
179
192
// Important: we’ll have to account for this later though.
180
193
181
194
// 2. Let results be a new empty list.
182
- /** @type {Array<string | BreakNumber> } */
183
- let results = [ ]
184
195
let index = - 1
185
196
186
197
// 3. For each child node node of this element:
@@ -190,9 +201,9 @@ export function toText(tree, options = {}) {
190
201
// Each item in results will either be a JavaScript string or a
191
202
// positive integer (a required line break count).
192
203
// 3.2. For each item item in current, append item to results.
193
- results = results . concat (
204
+ results . push (
194
205
// @ts -expect-error Looks like a parent.
195
- innerTextCollection ( children [ index ] , tree , {
206
+ ... innerTextCollection ( children [ index ] , tree , {
196
207
whitespace,
197
208
breakBefore : index ? undefined : block ,
198
209
breakAfter :
@@ -221,8 +232,11 @@ export function toText(tree, options = {}) {
221
232
if ( typeof value === 'number' ) {
222
233
if ( count !== undefined && value > count ) count = value
223
234
} else if ( value ) {
224
- if ( count ) result . push ( '\n' . repeat ( count ) )
225
- count = 0
235
+ if ( count !== undefined && count > - 1 ) {
236
+ result . push ( '\n' . repeat ( count ) || ' ' )
237
+ }
238
+
239
+ count = - 1
226
240
result . push ( value )
227
241
}
228
242
}
@@ -245,11 +259,9 @@ function innerTextCollection(node, parent, info) {
245
259
}
246
260
247
261
if ( node . type === 'text' ) {
248
- return [
249
- info . whitespace === 'normal'
250
- ? collectText ( node , info )
251
- : collectPreText ( node )
252
- ]
262
+ return info . whitespace === 'normal'
263
+ ? collectText ( node , info )
264
+ : collectPreText ( node )
253
265
}
254
266
255
267
return [ ]
@@ -259,8 +271,11 @@ function innerTextCollection(node, parent, info) {
259
271
* Collect an element.
260
272
*
261
273
* @param {Element } node
274
+ * Element node.
262
275
* @param {Parent } parent
263
276
* @param {CollectionInfo } info
277
+ * Info on current collection.
278
+ * @returns {Array<string | BreakNumber> }
264
279
*/
265
280
function collectElement ( node , parent , info ) {
266
281
// First we infer the `white-space` property.
@@ -376,18 +391,21 @@ function collectElement(node, parent, info) {
376
391
* See: <https://drafts.csswg.org/css-text/#white-space-phase-1>
377
392
*
378
393
* @param {Text | Comment } node
394
+ * Text node.
379
395
* @param {CollectionInfo } info
380
- * @returns {string }
396
+ * Info on current collection.
397
+ * @returns {Array<string | BreakNumber> }
398
+ * Result.
381
399
*/
382
400
function collectText ( node , info ) {
383
401
const value = String ( node . value )
384
402
/** @type {Array<string> } */
385
403
const lines = [ ]
386
- /** @type {Array<string> } */
404
+ /** @type {Array<string | BreakNumber > } */
387
405
const result = [ ]
388
406
let start = 0
389
407
390
- while ( start < value . length ) {
408
+ while ( start <= value . length ) {
391
409
searchLineFeeds . lastIndex = start
392
410
393
411
const match = searchLineFeeds . exec ( value )
@@ -397,14 +415,14 @@ function collectText(node, info) {
397
415
// Any sequence of collapsible spaces and tabs immediately preceding or
398
416
// following a segment break is removed.
399
417
trimAndCollapseSpacesAndTabs (
400
- // [... ] ignoring bidi formatting characters (characters with the
418
+ // [… ] ignoring bidi formatting characters (characters with the
401
419
// Bidi_Control property [UAX9]: ALM, LTR, RTL, LRE-RLO, LRI-PDI) as if
402
420
// they were not there.
403
421
value
404
422
. slice ( start , end )
405
423
. replace ( / [ \u061C \u200E \u200F \u202A - \u202E \u2066 - \u2069 ] / g, '' ) ,
406
- info . breakBefore ,
407
- info . breakAfter
424
+ start === 0 ? info . breakBefore : true ,
425
+ end === value . length ? info . breakAfter : true
408
426
)
409
427
)
410
428
@@ -417,7 +435,8 @@ function collectText(node, info) {
417
435
// Any collapsible segment break immediately following another collapsible
418
436
// segment break is removed
419
437
let index = - 1
420
- let join = ''
438
+ /** @type {BreakNumber | undefined } */
439
+ let join
421
440
422
441
while ( ++ index < lines . length ) {
423
442
// * If the character immediately before or immediately after the segment
@@ -429,7 +448,7 @@ function collectText(node, info) {
429
448
lines [ index + 1 ] . charCodeAt ( 0 ) === 0x200b ) /* ZWSP */
430
449
) {
431
450
result . push ( lines [ index ] )
432
- join = ''
451
+ join = undefined
433
452
}
434
453
435
454
// * Otherwise, if the East Asian Width property [UAX11] of both the
@@ -449,21 +468,30 @@ function collectText(node, info) {
449
468
450
469
// * Otherwise, the segment break is converted to a space (U+0020).
451
470
else if ( lines [ index ] ) {
452
- if ( join ) result . push ( join )
471
+ if ( typeof join === 'number' ) result . push ( join )
453
472
result . push ( lines [ index ] )
454
- join = ' '
473
+ join = 0
474
+ } else if ( index === 0 || index === lines . length - 1 ) {
475
+ // If this line is empty, and it’s the first or last, add a space.
476
+ // Note that this function is only called in normal whitespace, so we
477
+ // don’t worry about `pre`.
478
+ result . push ( 0 )
455
479
}
456
480
}
457
481
458
- return result . join ( '' )
482
+ return result
459
483
}
460
484
461
485
/**
462
- * @param {Text | Comment } node
463
- * @returns {string }
486
+ * Collect a text node as “pre” whitespace.
487
+ *
488
+ * @param {Text } node
489
+ * Text node.
490
+ * @returns {Array<string | BreakNumber> }
491
+ * Result.
464
492
*/
465
493
function collectPreText ( node ) {
466
- return String ( node . value )
494
+ return [ String ( node . value ) ]
467
495
}
468
496
469
497
/**
@@ -475,9 +503,13 @@ function collectPreText(node) {
475
503
* but retains its soft wrap opportunity, if any.)
476
504
*
477
505
* @param {string } value
506
+ * Value to collapse.
478
507
* @param {BreakBefore } breakBefore
508
+ * Whether there was a break before.
479
509
* @param {BreakAfter } breakAfter
510
+ * Whether there was a break after.
480
511
* @returns {string }
512
+ * Result.
481
513
*/
482
514
function trimAndCollapseSpacesAndTabs ( value , breakBefore , breakAfter ) {
483
515
/** @type {Array<string> } */
@@ -515,11 +547,16 @@ function trimAndCollapseSpacesAndTabs(value, breakBefore, breakAfter) {
515
547
}
516
548
517
549
/**
550
+ * Figure out the whitespace of a node.
551
+ *
518
552
* We don’t support void elements here (so `nobr wbr` -> `normal` is ignored).
519
553
*
520
554
* @param {Node } node
555
+ * Node (typically `Element`).
521
556
* @param {CollectionInfo } info
557
+ * Info on current collection.
522
558
* @returns {Whitespace }
559
+ * Applied whitespace.
523
560
*/
524
561
function inferWhitespace ( node , info ) {
525
562
if ( node . type === 'element' ) {
0 commit comments