@@ -41,8 +41,9 @@ var spaceChar = ' '
41
41
function toText ( node ) {
42
42
var children = node . children || [ ]
43
43
var length = children . length
44
+ var block = blockOrCaption ( node )
45
+ var whiteSpace = inferWhiteSpace ( node , { } )
44
46
var index = - 1
45
- var options = { whiteSpace : inferWhiteSpace ( node , { } ) }
46
47
var results
47
48
var current
48
49
var result
@@ -58,7 +59,11 @@ function toText(node) {
58
59
// Nodes without children are treated as a void element, so `doctype` is thus
59
60
// ignored.
60
61
if ( node . type === 'text' || node . type === 'comment' ) {
61
- return collectText ( node )
62
+ return collectText ( node , {
63
+ whiteSpace : whiteSpace ,
64
+ breakBefore : true ,
65
+ breakAfter : true
66
+ } )
62
67
}
63
68
64
69
// 1. If this element is not being rendered, or if the user agent is a
@@ -80,7 +85,11 @@ function toText(node) {
80
85
// collection steps with node.
81
86
// Each item in results will either be a JavaScript string or a
82
87
// positive integer (a required line break count).
83
- current = innerTextCollection ( children [ index ] , index , node , options )
88
+ current = innerTextCollection ( children [ index ] , index , node , {
89
+ whiteSpace : whiteSpace ,
90
+ breakBefore : index === 0 ? block : false ,
91
+ breakAfter : index === length - 1 ? block : is ( children [ index + 1 ] , 'br' )
92
+ } )
84
93
85
94
// 3.2. For each item item in current, append item to results.
86
95
results = results . concat ( current )
@@ -126,7 +135,9 @@ function innerTextCollection(node, index, parent, options) {
126
135
127
136
if ( node . type === 'text' ) {
128
137
return [
129
- options . whiteSpace === 'normal' ? collectText ( node ) : collectPreText ( node )
138
+ options . whiteSpace === 'normal'
139
+ ? collectText ( node , options )
140
+ : collectPreText ( node , options )
130
141
]
131
142
}
132
143
@@ -136,26 +147,23 @@ function innerTextCollection(node, index, parent, options) {
136
147
// Collect an element.
137
148
function collectElement ( node , index , parent , options ) {
138
149
// First we infer the `white-space` property.
139
- var settings = { whiteSpace : inferWhiteSpace ( node , options ) }
150
+ var whiteSpace = inferWhiteSpace ( node , options )
140
151
var children = node . children || [ ]
141
152
var length = children . length
142
153
var offset = - 1
143
154
var items = [ ]
155
+ var current
156
+ var prefix
157
+ var suffix
144
158
145
159
// We’re ignoring point 3, and exiting without any content here, because we
146
160
// deviated from the spec in `toText` at step 3.
147
161
if ( notRendered ( node ) ) {
148
162
return items
149
163
}
150
164
151
- // 1. Let items be the result of running the inner text collection steps with
152
- // each child node of node in tree order, and then concatenating the
153
- // results to a single list.
154
- while ( ++ offset < length ) {
155
- items = items . concat (
156
- innerTextCollection ( children [ offset ] , offset , node , settings )
157
- )
158
- }
165
+ // Note: we first detect if there is going to be a break before or after the
166
+ // contents, as that changes the white-space handling.
159
167
160
168
// 2. If node’s computed value of `visibility` is not `visible`, then return
161
169
// items.
@@ -166,36 +174,12 @@ function collectElement(node, index, parent, options) {
166
174
//
167
175
// Note: We already did this above.
168
176
169
- // 4. If node is a Text node, then for each CSS text box produced by node,
170
- // in content order, compute the text of the box after application of the
171
- // CSS `white-space` processing rules and `text-transform` rules, set
172
- // items to the list of the resulting strings, and return items.
173
- // The CSS `white-space` processing rules are slightly modified:
174
- // collapsible spaces at the end of lines are always collapsed, but they
175
- // are only removed if the line is the last line of the block, or it ends
176
- // with a br element.
177
- // Soft hyphens should be preserved.
178
- //
179
- // Note: See `collectText` and `collectPreText`.
180
- // Note: we don’t deal with `text-transform`, no element has that by
181
- // default.
182
- // Note: I don’t understand the last line, as we’re dealing with text
183
- // here, there’s no `<br>` elements.
177
+ // See `collectText` for step 4.
184
178
185
179
// 5. If node is a `<br>` element, then append a string containing a single
186
180
// U+000A LINE FEED (LF) character to items.
187
181
if ( is ( node , 'br' ) ) {
188
- items . push ( lineFeedChar )
189
- }
190
-
191
- // 6. If node’s computed value of `display` is `table-cell`, and node’s CSS
192
- // box is not the last `table-cell` box of its enclosing `table-row` box,
193
- // then append a string containing a single U+0009 CHARACTER TABULATION
194
- // (tab) character to items.
195
- //
196
- // See: <https://html.spec.whatwg.org/#tables-2>
197
- else if ( cell ( node ) && findAfter ( parent , node , cell ) ) {
198
- items . push ( tabChar )
182
+ suffix = lineFeedChar
199
183
}
200
184
201
185
// 7. If node’s computed value of `display` is `table-row`, and node’s CSS
@@ -207,27 +191,78 @@ function collectElement(node, index, parent, options) {
207
191
// Note: needs further investigation as this does not account for implicit
208
192
// rows.
209
193
else if ( row ( node ) && findAfter ( parent , node , row ) ) {
210
- items . push ( lineFeedChar )
194
+ suffix = lineFeedChar
211
195
}
212
196
213
197
// 8. If node is a `<p>` element, then append 2 (a required line break count)
214
198
// at the beginning and end of items.
215
199
else if ( is ( node , 'p' ) ) {
216
- items = [ 2 ] . concat ( items , 2 )
200
+ prefix = 2
201
+ suffix = 2
217
202
}
218
203
219
204
// 9. If node’s used value of `display` is block-level or `table-caption`,
220
205
// then append 1 (a required line break count) at the beginning and end of
221
206
// items.
222
207
else if ( blockOrCaption ( node ) ) {
223
- items = [ 1 ] . concat ( items , 1 )
208
+ prefix = 1
209
+ suffix = 1
210
+ }
211
+
212
+ // 1. Let items be the result of running the inner text collection steps with
213
+ // each child node of node in tree order, and then concatenating the
214
+ // results to a single list.
215
+ while ( ++ offset < length ) {
216
+ current = innerTextCollection ( children [ offset ] , offset , node , {
217
+ whiteSpace : whiteSpace ,
218
+ breakBefore : offset === 0 ? prefix : false ,
219
+ breakAfter :
220
+ offset === length - 1 ? suffix : is ( children [ offset + 1 ] , 'br' )
221
+ } )
222
+
223
+ items = items . concat ( current )
224
+ }
225
+
226
+ // 6. If node’s computed value of `display` is `table-cell`, and node’s CSS
227
+ // box is not the last `table-cell` box of its enclosing `table-row` box,
228
+ // then append a string containing a single U+0009 CHARACTER TABULATION
229
+ // (tab) character to items.
230
+ //
231
+ // See: <https://html.spec.whatwg.org/#tables-2>
232
+ if ( cell ( node ) && findAfter ( parent , node , cell ) ) {
233
+ items . push ( tabChar )
234
+ }
235
+
236
+ // Add the pre- and suffix.
237
+ if ( prefix ) {
238
+ items . unshift ( prefix )
239
+ }
240
+
241
+ if ( suffix ) {
242
+ items . push ( suffix )
224
243
}
225
244
226
245
return items
227
246
}
228
247
248
+ // 4. If node is a Text node, then for each CSS text box produced by node,
249
+ // in content order, compute the text of the box after application of the
250
+ // CSS `white-space` processing rules and `text-transform` rules, set
251
+ // items to the list of the resulting strings, and return items.
252
+ // The CSS `white-space` processing rules are slightly modified:
253
+ // collapsible spaces at the end of lines are always collapsed, but they
254
+ // are only removed if the line is the last line of the block, or it ends
255
+ // with a br element.
256
+ // Soft hyphens should be preserved.
257
+ //
258
+ // Note: See `collectText` and `collectPreText`.
259
+ // Note: we don’t deal with `text-transform`, no element has that by
260
+ // default.
261
+ //
229
262
// See: <https://drafts.csswg.org/css-text/#white-space-phase-1>
230
- function collectText ( node ) {
263
+ function collectText ( node , options ) {
264
+ var breakBefore = options . breakBefore
265
+ var breakAfter = options . breakAfter
231
266
var value = String ( node . value )
232
267
var index = - 1
233
268
var length = value . length
@@ -252,7 +287,7 @@ function collectText(node) {
252
287
253
288
// Any sequence of collapsible spaces and tabs immediately preceding or
254
289
// following a segment break is removed.
255
- line = trimAndcollapseSpacesAndTabs ( line )
290
+ line = trimAndcollapseSpacesAndTabs ( line , breakBefore , breakAfter )
256
291
257
292
// Add the line.
258
293
lines . push ( line )
@@ -348,14 +383,14 @@ function removeBidiControlCharacters(value) {
348
383
// space, provided both spaces are within the same inline formatting
349
384
// context—is collapsed to have zero advance width. (It is invisible,
350
385
// but retains its soft wrap opportunity, if any.)
351
- function trimAndcollapseSpacesAndTabs ( value ) {
386
+ function trimAndcollapseSpacesAndTabs ( value , breakBefore , breakAfter ) {
352
387
var start = 0
353
388
var end
354
389
var length = value . length
355
390
var result = [ ]
356
391
var char
357
392
358
- // Move forward pas initial white space.
393
+ // Move forward past initial white space.
359
394
while ( start <= length ) {
360
395
char = value . charCodeAt ( start )
361
396
@@ -366,6 +401,12 @@ function trimAndcollapseSpacesAndTabs(value) {
366
401
start ++
367
402
}
368
403
404
+ // If we’re not directly after a segment break, but there was white space,
405
+ // add an empty value that will be turned into a space.
406
+ if ( start !== 0 && ! breakBefore ) {
407
+ result . push ( '' )
408
+ }
409
+
369
410
end = next ( start - 1 )
370
411
371
412
while ( start < length ) {
@@ -383,6 +424,13 @@ function trimAndcollapseSpacesAndTabs(value) {
383
424
start ++
384
425
}
385
426
427
+ // If we reached the end, there was trailing white space, and there’s no
428
+ // segment break after this node, add an empty value that will be turned
429
+ // into a space.
430
+ if ( start === length && start !== end && ! breakAfter ) {
431
+ result . push ( '' )
432
+ }
433
+
386
434
end = next ( start )
387
435
}
388
436
0 commit comments