Skip to content

Commit e45fea4

Browse files
authored
Fix inlines; trim before <br> elements
Closes GH-1.
1 parent a07551f commit e45fea4

File tree

2 files changed

+112
-46
lines changed

2 files changed

+112
-46
lines changed

index.js

Lines changed: 94 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,9 @@ var spaceChar = ' '
4141
function toText(node) {
4242
var children = node.children || []
4343
var length = children.length
44+
var block = blockOrCaption(node)
45+
var whiteSpace = inferWhiteSpace(node, {})
4446
var index = -1
45-
var options = {whiteSpace: inferWhiteSpace(node, {})}
4647
var results
4748
var current
4849
var result
@@ -58,7 +59,11 @@ function toText(node) {
5859
// Nodes without children are treated as a void element, so `doctype` is thus
5960
// ignored.
6061
if (node.type === 'text' || node.type === 'comment') {
61-
return collectText(node)
62+
return collectText(node, {
63+
whiteSpace: whiteSpace,
64+
breakBefore: true,
65+
breakAfter: true
66+
})
6267
}
6368

6469
// 1. If this element is not being rendered, or if the user agent is a
@@ -80,7 +85,11 @@ function toText(node) {
8085
// collection steps with node.
8186
// Each item in results will either be a JavaScript string or a
8287
// positive integer (a required line break count).
83-
current = innerTextCollection(children[index], index, node, options)
88+
current = innerTextCollection(children[index], index, node, {
89+
whiteSpace: whiteSpace,
90+
breakBefore: index === 0 ? block : false,
91+
breakAfter: index === length - 1 ? block : is(children[index + 1], 'br')
92+
})
8493

8594
// 3.2. For each item item in current, append item to results.
8695
results = results.concat(current)
@@ -126,7 +135,9 @@ function innerTextCollection(node, index, parent, options) {
126135

127136
if (node.type === 'text') {
128137
return [
129-
options.whiteSpace === 'normal' ? collectText(node) : collectPreText(node)
138+
options.whiteSpace === 'normal'
139+
? collectText(node, options)
140+
: collectPreText(node, options)
130141
]
131142
}
132143

@@ -136,26 +147,23 @@ function innerTextCollection(node, index, parent, options) {
136147
// Collect an element.
137148
function collectElement(node, index, parent, options) {
138149
// First we infer the `white-space` property.
139-
var settings = {whiteSpace: inferWhiteSpace(node, options)}
150+
var whiteSpace = inferWhiteSpace(node, options)
140151
var children = node.children || []
141152
var length = children.length
142153
var offset = -1
143154
var items = []
155+
var current
156+
var prefix
157+
var suffix
144158

145159
// We’re ignoring point 3, and exiting without any content here, because we
146160
// deviated from the spec in `toText` at step 3.
147161
if (notRendered(node)) {
148162
return items
149163
}
150164

151-
// 1. Let items be the result of running the inner text collection steps with
152-
// each child node of node in tree order, and then concatenating the
153-
// results to a single list.
154-
while (++offset < length) {
155-
items = items.concat(
156-
innerTextCollection(children[offset], offset, node, settings)
157-
)
158-
}
165+
// Note: we first detect if there is going to be a break before or after the
166+
// contents, as that changes the white-space handling.
159167

160168
// 2. If node’s computed value of `visibility` is not `visible`, then return
161169
// items.
@@ -166,36 +174,12 @@ function collectElement(node, index, parent, options) {
166174
//
167175
// Note: We already did this above.
168176

169-
// 4. If node is a Text node, then for each CSS text box produced by node,
170-
// in content order, compute the text of the box after application of the
171-
// CSS `white-space` processing rules and `text-transform` rules, set
172-
// items to the list of the resulting strings, and return items.
173-
// The CSS `white-space` processing rules are slightly modified:
174-
// collapsible spaces at the end of lines are always collapsed, but they
175-
// are only removed if the line is the last line of the block, or it ends
176-
// with a br element.
177-
// Soft hyphens should be preserved.
178-
//
179-
// Note: See `collectText` and `collectPreText`.
180-
// Note: we don’t deal with `text-transform`, no element has that by
181-
// default.
182-
// Note: I don’t understand the last line, as we’re dealing with text
183-
// here, there’s no `<br>` elements.
177+
// See `collectText` for step 4.
184178

185179
// 5. If node is a `<br>` element, then append a string containing a single
186180
// U+000A LINE FEED (LF) character to items.
187181
if (is(node, 'br')) {
188-
items.push(lineFeedChar)
189-
}
190-
191-
// 6. If node’s computed value of `display` is `table-cell`, and node’s CSS
192-
// box is not the last `table-cell` box of its enclosing `table-row` box,
193-
// then append a string containing a single U+0009 CHARACTER TABULATION
194-
// (tab) character to items.
195-
//
196-
// See: <https://html.spec.whatwg.org/#tables-2>
197-
else if (cell(node) && findAfter(parent, node, cell)) {
198-
items.push(tabChar)
182+
suffix = lineFeedChar
199183
}
200184

201185
// 7. If node’s computed value of `display` is `table-row`, and node’s CSS
@@ -207,27 +191,78 @@ function collectElement(node, index, parent, options) {
207191
// Note: needs further investigation as this does not account for implicit
208192
// rows.
209193
else if (row(node) && findAfter(parent, node, row)) {
210-
items.push(lineFeedChar)
194+
suffix = lineFeedChar
211195
}
212196

213197
// 8. If node is a `<p>` element, then append 2 (a required line break count)
214198
// at the beginning and end of items.
215199
else if (is(node, 'p')) {
216-
items = [2].concat(items, 2)
200+
prefix = 2
201+
suffix = 2
217202
}
218203

219204
// 9. If node’s used value of `display` is block-level or `table-caption`,
220205
// then append 1 (a required line break count) at the beginning and end of
221206
// items.
222207
else if (blockOrCaption(node)) {
223-
items = [1].concat(items, 1)
208+
prefix = 1
209+
suffix = 1
210+
}
211+
212+
// 1. Let items be the result of running the inner text collection steps with
213+
// each child node of node in tree order, and then concatenating the
214+
// results to a single list.
215+
while (++offset < length) {
216+
current = innerTextCollection(children[offset], offset, node, {
217+
whiteSpace: whiteSpace,
218+
breakBefore: offset === 0 ? prefix : false,
219+
breakAfter:
220+
offset === length - 1 ? suffix : is(children[offset + 1], 'br')
221+
})
222+
223+
items = items.concat(current)
224+
}
225+
226+
// 6. If node’s computed value of `display` is `table-cell`, and node’s CSS
227+
// box is not the last `table-cell` box of its enclosing `table-row` box,
228+
// then append a string containing a single U+0009 CHARACTER TABULATION
229+
// (tab) character to items.
230+
//
231+
// See: <https://html.spec.whatwg.org/#tables-2>
232+
if (cell(node) && findAfter(parent, node, cell)) {
233+
items.push(tabChar)
234+
}
235+
236+
// Add the pre- and suffix.
237+
if (prefix) {
238+
items.unshift(prefix)
239+
}
240+
241+
if (suffix) {
242+
items.push(suffix)
224243
}
225244

226245
return items
227246
}
228247

248+
// 4. If node is a Text node, then for each CSS text box produced by node,
249+
// in content order, compute the text of the box after application of the
250+
// CSS `white-space` processing rules and `text-transform` rules, set
251+
// items to the list of the resulting strings, and return items.
252+
// The CSS `white-space` processing rules are slightly modified:
253+
// collapsible spaces at the end of lines are always collapsed, but they
254+
// are only removed if the line is the last line of the block, or it ends
255+
// with a br element.
256+
// Soft hyphens should be preserved.
257+
//
258+
// Note: See `collectText` and `collectPreText`.
259+
// Note: we don’t deal with `text-transform`, no element has that by
260+
// default.
261+
//
229262
// See: <https://drafts.csswg.org/css-text/#white-space-phase-1>
230-
function collectText(node) {
263+
function collectText(node, options) {
264+
var breakBefore = options.breakBefore
265+
var breakAfter = options.breakAfter
231266
var value = String(node.value)
232267
var index = -1
233268
var length = value.length
@@ -252,7 +287,7 @@ function collectText(node) {
252287

253288
// Any sequence of collapsible spaces and tabs immediately preceding or
254289
// following a segment break is removed.
255-
line = trimAndcollapseSpacesAndTabs(line)
290+
line = trimAndcollapseSpacesAndTabs(line, breakBefore, breakAfter)
256291

257292
// Add the line.
258293
lines.push(line)
@@ -348,14 +383,14 @@ function removeBidiControlCharacters(value) {
348383
// space, provided both spaces are within the same inline formatting
349384
// context—is collapsed to have zero advance width. (It is invisible,
350385
// but retains its soft wrap opportunity, if any.)
351-
function trimAndcollapseSpacesAndTabs(value) {
386+
function trimAndcollapseSpacesAndTabs(value, breakBefore, breakAfter) {
352387
var start = 0
353388
var end
354389
var length = value.length
355390
var result = []
356391
var char
357392

358-
// Move forward pas initial white space.
393+
// Move forward past initial white space.
359394
while (start <= length) {
360395
char = value.charCodeAt(start)
361396

@@ -366,6 +401,12 @@ function trimAndcollapseSpacesAndTabs(value) {
366401
start++
367402
}
368403

404+
// If we’re not directly after a segment break, but there was white space,
405+
// add an empty value that will be turned into a space.
406+
if (start !== 0 && !breakBefore) {
407+
result.push('')
408+
}
409+
369410
end = next(start - 1)
370411

371412
while (start < length) {
@@ -383,6 +424,13 @@ function trimAndcollapseSpacesAndTabs(value) {
383424
start++
384425
}
385426

427+
// If we reached the end, there was trailing white space, and there’s no
428+
// segment break after this node, add an empty value that will be turned
429+
// into a space.
430+
if (start === length && start !== end && !breakAfter) {
431+
result.push('')
432+
}
433+
386434
end = next(start)
387435
}
388436

test.js

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,12 @@ test('hast-util-to-text', function(t) {
138138
'should ignore closed dialogs'
139139
)
140140

141+
t.equal(
142+
toText(h('p', ['Zulu\t', h('span', 'zulu'), ' \t zulu.'])),
143+
'Zulu zulu zulu.',
144+
'should support white-space around elements'
145+
)
146+
141147
t.test('normal white-space', function(st) {
142148
st.equal(
143149
toText(h('p', 'Alpha bravo charlie.')),
@@ -225,6 +231,18 @@ test('hast-util-to-text', function(t) {
225231
'should not collapse line feeds to a space if they’re surrounded by a zero width space'
226232
)
227233

234+
t.equal(
235+
toText(h('div', h('p', ['Delta. ', h('br')]))),
236+
'Delta.\n',
237+
'should support trim white-space before a `<br>` (#1)'
238+
)
239+
240+
t.equal(
241+
toText(h('p', ['Delta. ', h('br')])),
242+
'Delta.\n',
243+
'should support trim white-space before a `<br>` (#2)'
244+
)
245+
228246
st.end()
229247
})
230248

0 commit comments

Comments
 (0)