@@ -132,7 +132,7 @@ private final class TokenStreamCreator: SyntaxVisitor {
132
132
appendBeforeTokens ( firstToken)
133
133
}
134
134
135
- appendToken ( . verbatim( Verbatim ( text: node. description) ) )
135
+ appendToken ( . verbatim( Verbatim ( text: node. description, indentingBehavior : . allLines ) ) )
136
136
137
137
if let lastToken = node. lastToken {
138
138
// Extract any comments that trail the verbatim block since they belong to the next syntax
@@ -1978,6 +1978,7 @@ private final class TokenStreamCreator: SyntaxVisitor {
1978
1978
appendToken ( . syntax( token. text) )
1979
1979
}
1980
1980
1981
+ appendTrailingTrivia ( token)
1981
1982
appendAfterTokensAndTrailingComments ( token)
1982
1983
1983
1984
// It doesn't matter what we return here, tokens do not have children.
@@ -2033,6 +2034,43 @@ private final class TokenStreamCreator: SyntaxVisitor {
2033
2034
}
2034
2035
}
2035
2036
2037
+ /// Handle trailing trivia that might contain garbage text that we don't want to indiscriminantly
2038
+ /// discard.
2039
+ ///
2040
+ /// In syntactically valid code, trailing trivia will only contain spaces or tabs, so we can
2041
+ /// usually ignore it entirely. If there is garbage text after a token, however, then we preserve
2042
+ /// it (and any whitespace immediately before it) and "glue" it to the end of the preceding token
2043
+ /// using a `verbatim` formatting token. Any whitespace following the last garbage text in the
2044
+ /// trailing trivia will be discarded, with the assumption that the formatter will have inserted
2045
+ /// some kind of break there that would be more appropriate (and we want to avoid inserting
2046
+ /// trailing whitespace on a line).
2047
+ ///
2048
+ /// The choices above are admittedly somewhat arbitrary, but given that garbage text in trailing
2049
+ /// trivia represents a malformed input (as opposed to garbage text in leading trivia, which has
2050
+ /// some legitimate uses), this is a reasonable compromise to keep the garbage text roughly in the
2051
+ /// same place but still let surrounding formatting occur somewhat as expected.
2052
+ private func appendTrailingTrivia( _ token: TokenSyntax ) {
2053
+ let trailingTrivia = Array ( token. trailingTrivia)
2054
+ guard let lastGarbageIndex = trailingTrivia. lastIndex ( where: { $0. isGarbageText } ) else {
2055
+ return
2056
+ }
2057
+
2058
+ var verbatimText = " "
2059
+ for piece in trailingTrivia [ ... lastGarbageIndex] {
2060
+ switch piece {
2061
+ case . garbageText, . spaces, . tabs, . formfeeds, . verticalTabs:
2062
+ piece. write ( to: & verbatimText)
2063
+ default :
2064
+ // The implementation of the lexer today ensures that newlines, carriage returns, and
2065
+ // comments will not be present in trailing trivia. Ignore them for now (rather than assert,
2066
+ // in case that changes in a future version).
2067
+ break
2068
+ }
2069
+ }
2070
+
2071
+ appendToken ( . verbatim( Verbatim ( text: verbatimText, indentingBehavior: . none) ) )
2072
+ }
2073
+
2036
2074
/// Appends the after-tokens and trailing comments (if present) of the given syntax token
2037
2075
/// to the token stream.
2038
2076
///
@@ -2397,7 +2435,11 @@ private final class TokenStreamCreator: SyntaxVisitor {
2397
2435
}
2398
2436
}
2399
2437
2400
- var lastPieceWasLineComment = false
2438
+ // Updated throughout the loop to indicate whether the next newline *must* be honored (for
2439
+ // example, even if discretionary newlines are discarded). This is the case when the preceding
2440
+ // trivia was a line comment or garbage text.
2441
+ var requiresNextNewline = false
2442
+
2401
2443
for (index, piece) in trivia. enumerated ( ) {
2402
2444
if let cutoff = cutoffIndex, index == cutoff { break }
2403
2445
switch piece {
@@ -2407,7 +2449,7 @@ private final class TokenStreamCreator: SyntaxVisitor {
2407
2449
appendNewlines ( . soft)
2408
2450
isStartOfFile = false
2409
2451
}
2410
- lastPieceWasLineComment = true
2452
+ requiresNextNewline = true
2411
2453
2412
2454
case . blockComment( let text) :
2413
2455
if index > 0 || isStartOfFile {
@@ -2418,39 +2460,51 @@ private final class TokenStreamCreator: SyntaxVisitor {
2418
2460
appendToken ( . break( . same, size: 0 ) )
2419
2461
isStartOfFile = false
2420
2462
}
2421
- lastPieceWasLineComment = false
2463
+ requiresNextNewline = false
2422
2464
2423
2465
case . docLineComment( let text) :
2424
2466
appendToken ( . comment( Comment ( kind: . docLine, text: text) , wasEndOfLine: false ) )
2425
2467
appendNewlines ( . soft)
2426
2468
isStartOfFile = false
2427
- lastPieceWasLineComment = true
2469
+ requiresNextNewline = true
2428
2470
2429
2471
case . docBlockComment( let text) :
2430
2472
appendToken ( . comment( Comment ( kind: . docBlock, text: text) , wasEndOfLine: false ) )
2431
2473
appendNewlines ( . soft)
2432
2474
isStartOfFile = false
2433
- lastPieceWasLineComment = false
2475
+ requiresNextNewline = false
2434
2476
2435
2477
case . newlines( let count) , . carriageReturns( let count) , . carriageReturnLineFeeds( let count) :
2436
2478
guard !isStartOfFile else { break }
2437
- // Even if we aren't respecting discretionary newlines, there must always be a newline after
2438
- // a line comment.
2439
- if lastPieceWasLineComment ||
2479
+
2480
+ if requiresNextNewline ||
2440
2481
( config. respectsExistingLineBreaks && isDiscretionaryNewlineAllowed ( before: token) )
2441
2482
{
2442
2483
appendNewlines ( . soft( count: count, discretionary: true ) )
2443
2484
} else {
2444
2485
// Even if discretionary line breaks are not being respected, we still respect multiple
2445
2486
// line breaks in order to keep blank separator lines that the user might want.
2446
2487
// TODO: It would be nice to restrict this to only allow multiple lines between statements
2447
- // and declarations; as currently implemented, multiple newlines will locally the
2488
+ // and declarations; as currently implemented, multiple newlines will locally ignore the
2448
2489
// configuration setting.
2449
2490
if count > 1 {
2450
2491
appendNewlines ( . soft( count: count, discretionary: true ) )
2451
2492
}
2452
2493
}
2453
2494
2495
+ case . garbageText( let text) :
2496
+ // Garbage text in leading trivia might be something meaningful that would be disruptive to
2497
+ // throw away when formatting the file, like a hashbang line or Unicode byte-order marker at
2498
+ // the beginning of a file, or source control conflict markers. Keep it as verbatim text so
2499
+ // that it is printed exactly as we got it.
2500
+ appendToken ( . verbatim( Verbatim ( text: text, indentingBehavior: . none) ) )
2501
+
2502
+ // Unicode byte-order markers shouldn't allow leading newlines to otherwise appear in the
2503
+ // file, nor should they modify our detection of the beginning of the file.
2504
+ let isBOM = text == " \u{feff} "
2505
+ requiresNextNewline = !isBOM
2506
+ isStartOfFile = isStartOfFile && isBOM
2507
+
2454
2508
default :
2455
2509
break
2456
2510
}
@@ -2956,6 +3010,16 @@ extension Collection {
2956
3010
}
2957
3011
}
2958
3012
3013
+ extension TriviaPiece {
3014
+ /// True if the trivia piece is garbage text.
3015
+ fileprivate var isGarbageText : Bool {
3016
+ switch self {
3017
+ case . garbageText: return true
3018
+ default : return false
3019
+ }
3020
+ }
3021
+ }
3022
+
2959
3023
/// Returns whether the given trivia includes a directive to ignore formatting for the next node.
2960
3024
///
2961
3025
/// - Parameter trivia: Leading trivia for a node that the formatter supports ignoring.
0 commit comments