Skip to content

Commit 387eff4

Browse files
committed
Fix line endings around element breaks in text
Closes GH-3.
1 parent 4de6078 commit 387eff4

File tree

2 files changed

+141
-30
lines changed

2 files changed

+141
-30
lines changed

lib/index.js

Lines changed: 67 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,16 @@
1414
* Any parent.
1515
* @typedef {'normal' | 'pre' | 'nowrap' | 'pre-wrap'} Whitespace
1616
* Valid and useful whitespace values (from CSS).
17-
* @typedef {boolean} BreakValue
18-
* Whether there was a break.
19-
* @typedef {1 | 2} BreakNumber
20-
* Specific break.
17+
* @typedef {0 | 1 | 2} BreakNumber
18+
* Specific break:
19+
*
20+
* * `0` — space
21+
* * `1` — line ending
22+
* * `2` — blank line
2123
* @typedef {'\n'} BreakForce
2224
* Forced break.
25+
* @typedef {boolean} BreakValue
26+
* Whether there was a break.
2327
* @typedef {BreakValue | BreakNumber | undefined} BreakBefore
2428
* Any value for a break before.
2529
* @typedef {BreakValue | BreakNumber | BreakForce | undefined} BreakAfter
@@ -156,6 +160,9 @@ export function toText(tree, options = {}) {
156160
breakAfter: false
157161
})
158162

163+
/** @type {Array<string | BreakNumber>} */
164+
const results = []
165+
159166
// Treat `text` and `comment` as having normal white-space.
160167
// This deviates from the spec as in the DOM the node’s `.data` has to be
161168
// returned.
@@ -165,7 +172,13 @@ export function toText(tree, options = {}) {
165172
// Nodes without children are treated as a void element, so `doctype` is thus
166173
// ignored.
167174
if (tree.type === 'text' || tree.type === 'comment') {
168-
return collectText(tree, {whitespace, breakBefore: true, breakAfter: true})
175+
results.push(
176+
...collectText(tree, {
177+
whitespace,
178+
breakBefore: true,
179+
breakAfter: true
180+
})
181+
)
169182
}
170183

171184
// 1. If this element is not being rendered, or if the user agent is a
@@ -179,8 +192,6 @@ export function toText(tree, options = {}) {
179192
// Important: we’ll have to account for this later though.
180193

181194
// 2. Let results be a new empty list.
182-
/** @type {Array<string | BreakNumber>} */
183-
let results = []
184195
let index = -1
185196

186197
// 3. For each child node node of this element:
@@ -190,9 +201,9 @@ export function toText(tree, options = {}) {
190201
// Each item in results will either be a JavaScript string or a
191202
// positive integer (a required line break count).
192203
// 3.2. For each item item in current, append item to results.
193-
results = results.concat(
204+
results.push(
194205
// @ts-expect-error Looks like a parent.
195-
innerTextCollection(children[index], tree, {
206+
...innerTextCollection(children[index], tree, {
196207
whitespace,
197208
breakBefore: index ? undefined : block,
198209
breakAfter:
@@ -221,8 +232,11 @@ export function toText(tree, options = {}) {
221232
if (typeof value === 'number') {
222233
if (count !== undefined && value > count) count = value
223234
} else if (value) {
224-
if (count) result.push('\n'.repeat(count))
225-
count = 0
235+
if (count !== undefined && count > -1) {
236+
result.push('\n'.repeat(count) || ' ')
237+
}
238+
239+
count = -1
226240
result.push(value)
227241
}
228242
}
@@ -245,11 +259,9 @@ function innerTextCollection(node, parent, info) {
245259
}
246260

247261
if (node.type === 'text') {
248-
return [
249-
info.whitespace === 'normal'
250-
? collectText(node, info)
251-
: collectPreText(node)
252-
]
262+
return info.whitespace === 'normal'
263+
? collectText(node, info)
264+
: collectPreText(node)
253265
}
254266

255267
return []
@@ -259,8 +271,11 @@ function innerTextCollection(node, parent, info) {
259271
* Collect an element.
260272
*
261273
* @param {Element} node
274+
* Element node.
262275
* @param {Parent} parent
263276
* @param {CollectionInfo} info
277+
* Info on current collection.
278+
* @returns {Array<string | BreakNumber>}
264279
*/
265280
function collectElement(node, parent, info) {
266281
// First we infer the `white-space` property.
@@ -376,18 +391,21 @@ function collectElement(node, parent, info) {
376391
* See: <https://drafts.csswg.org/css-text/#white-space-phase-1>
377392
*
378393
* @param {Text | Comment} node
394+
* Text node.
379395
* @param {CollectionInfo} info
380-
* @returns {string}
396+
* Info on current collection.
397+
* @returns {Array<string | BreakNumber>}
398+
* Result.
381399
*/
382400
function collectText(node, info) {
383401
const value = String(node.value)
384402
/** @type {Array<string>} */
385403
const lines = []
386-
/** @type {Array<string>} */
404+
/** @type {Array<string | BreakNumber>} */
387405
const result = []
388406
let start = 0
389407

390-
while (start < value.length) {
408+
while (start <= value.length) {
391409
searchLineFeeds.lastIndex = start
392410

393411
const match = searchLineFeeds.exec(value)
@@ -397,14 +415,14 @@ function collectText(node, info) {
397415
// Any sequence of collapsible spaces and tabs immediately preceding or
398416
// following a segment break is removed.
399417
trimAndCollapseSpacesAndTabs(
400-
// [...] ignoring bidi formatting characters (characters with the
418+
// [] ignoring bidi formatting characters (characters with the
401419
// Bidi_Control property [UAX9]: ALM, LTR, RTL, LRE-RLO, LRI-PDI) as if
402420
// they were not there.
403421
value
404422
.slice(start, end)
405423
.replace(/[\u061C\u200E\u200F\u202A-\u202E\u2066-\u2069]/g, ''),
406-
info.breakBefore,
407-
info.breakAfter
424+
start === 0 ? info.breakBefore : true,
425+
end === value.length ? info.breakAfter : true
408426
)
409427
)
410428

@@ -417,7 +435,8 @@ function collectText(node, info) {
417435
// Any collapsible segment break immediately following another collapsible
418436
// segment break is removed
419437
let index = -1
420-
let join = ''
438+
/** @type {BreakNumber | undefined} */
439+
let join
421440

422441
while (++index < lines.length) {
423442
// * If the character immediately before or immediately after the segment
@@ -429,7 +448,7 @@ function collectText(node, info) {
429448
lines[index + 1].charCodeAt(0) === 0x200b) /* ZWSP */
430449
) {
431450
result.push(lines[index])
432-
join = ''
451+
join = undefined
433452
}
434453

435454
// * Otherwise, if the East Asian Width property [UAX11] of both the
@@ -449,21 +468,30 @@ function collectText(node, info) {
449468

450469
// * Otherwise, the segment break is converted to a space (U+0020).
451470
else if (lines[index]) {
452-
if (join) result.push(join)
471+
if (typeof join === 'number') result.push(join)
453472
result.push(lines[index])
454-
join = ' '
473+
join = 0
474+
} else if (index === 0 || index === lines.length - 1) {
475+
// If this line is empty, and it’s the first or last, add a space.
476+
// Note that this function is only called in normal whitespace, so we
477+
// don’t worry about `pre`.
478+
result.push(0)
455479
}
456480
}
457481

458-
return result.join('')
482+
return result
459483
}
460484

461485
/**
462-
* @param {Text | Comment} node
463-
* @returns {string}
486+
* Collect a text node as “pre” whitespace.
487+
*
488+
* @param {Text} node
489+
* Text node.
490+
* @returns {Array<string | BreakNumber>}
491+
* Result.
464492
*/
465493
function collectPreText(node) {
466-
return String(node.value)
494+
return [String(node.value)]
467495
}
468496

469497
/**
@@ -475,9 +503,13 @@ function collectPreText(node) {
475503
* but retains its soft wrap opportunity, if any.)
476504
*
477505
* @param {string} value
506+
* Value to collapse.
478507
* @param {BreakBefore} breakBefore
508+
* Whether there was a break before.
479509
* @param {BreakAfter} breakAfter
510+
* Whether there was a break after.
480511
* @returns {string}
512+
* Result.
481513
*/
482514
function trimAndCollapseSpacesAndTabs(value, breakBefore, breakAfter) {
483515
/** @type {Array<string>} */
@@ -515,11 +547,16 @@ function trimAndCollapseSpacesAndTabs(value, breakBefore, breakAfter) {
515547
}
516548

517549
/**
550+
* Figure out the whitespace of a node.
551+
*
518552
* We don’t support void elements here (so `nobr wbr` -> `normal` is ignored).
519553
*
520554
* @param {Node} node
555+
* Node (typically `Element`).
521556
* @param {CollectionInfo} info
557+
* Info on current collection.
522558
* @returns {Whitespace}
559+
* Applied whitespace.
523560
*/
524561
function inferWhitespace(node, info) {
525562
if (node.type === 'element') {

test.js

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -319,3 +319,77 @@ test('non-normal white-space', () => {
319319
'should support a `textarea` element'
320320
)
321321
})
322+
323+
test('more whitespace', () => {
324+
assert.equal(
325+
toText(h('p', ['A\n', h('span', 'b')])),
326+
'A b',
327+
'should support line endings around element breaks (1)'
328+
)
329+
330+
assert.equal(
331+
toText(h('p', ['A\nb', h('span', 'c')])),
332+
'A bc',
333+
'should support line endings around element breaks (2)'
334+
)
335+
336+
assert.equal(
337+
toText(h('p', ['A', h('span', '\nb')])),
338+
'A b',
339+
'should support line endings around element breaks (3)'
340+
)
341+
342+
assert.equal(
343+
toText(h('p', ['A\n', h('span', '\nb')])),
344+
'A b',
345+
'should support line endings around element breaks (4)'
346+
)
347+
348+
assert.equal(
349+
toText(h('p', [h('span', 'A\n'), h('span', 'b')])),
350+
'A b',
351+
'should support line endings around element breaks (5)'
352+
)
353+
354+
assert.equal(
355+
toText(h('p', [h('span', 'A'), h('span', '\nb')])),
356+
'A b',
357+
'should support line endings around element breaks (6)'
358+
)
359+
360+
assert.equal(
361+
toText(h('p', [h('span', 'A\n'), h('span', '\nb')])),
362+
'A b',
363+
'should support line endings around element breaks (7)'
364+
)
365+
366+
assert.equal(
367+
toText(h('p', [h('span', 'A\n'), 'b'])),
368+
'A b',
369+
'should support line endings around element breaks (8)'
370+
)
371+
372+
assert.equal(
373+
toText(h('p', [h('span', 'A'), '\nb'])),
374+
'A b',
375+
'should support line endings around element breaks (9)'
376+
)
377+
378+
assert.equal(
379+
toText(h('p', [h('span', 'A\n'), '\nb'])),
380+
'A b',
381+
'should support line endings around element breaks (10)'
382+
)
383+
384+
assert.equal(
385+
toText(h('div', [h('p', [h('span', 'A\n'), '\nb'])])),
386+
'A b',
387+
'should support line endings around element breaks (11)'
388+
)
389+
390+
assert.equal(
391+
toText(h('pre', ['A\n', h('span', 'b')])),
392+
'A\nb',
393+
'should support line endings around element breaks (12)'
394+
)
395+
})

0 commit comments

Comments
 (0)