@@ -132,7 +132,7 @@ private final class TokenStreamCreator: SyntaxVisitor {
132
132
appendBeforeTokens ( firstToken)
133
133
}
134
134
135
- appendToken ( . verbatim( Verbatim ( text: node. description) ) )
135
+ appendToken ( . verbatim( Verbatim ( text: node. description, indentingBehavior : . allLines ) ) )
136
136
137
137
if let lastToken = node. lastToken {
138
138
// Extract any comments that trail the verbatim block since they belong to the next syntax
@@ -1978,6 +1978,7 @@ private final class TokenStreamCreator: SyntaxVisitor {
1978
1978
appendToken ( . syntax( token. text) )
1979
1979
}
1980
1980
1981
+ appendTrailingTrivia ( token)
1981
1982
appendAfterTokensAndTrailingComments ( token)
1982
1983
1983
1984
// It doesn't matter what we return here, tokens do not have children.
@@ -2033,6 +2034,43 @@ private final class TokenStreamCreator: SyntaxVisitor {
2033
2034
}
2034
2035
}
2035
2036
2037
+ /// Handle trailing trivia that might contain garbage text that we don't want to indiscriminantly
2038
+ /// discard.
2039
+ ///
2040
+ /// In syntactically valid code, trailing trivia will only contain spaces or tabs, so we can
2041
+ /// usually ignore it entirely. If there is garbage text after a token, however, then we preserve
2042
+ /// it (and any whitespace immediately before it) and "glue" it to the end of the preceding token
2043
+ /// using a `verbatim` formatting token. Any whitespace following the last garbage text in the
2044
+ /// trailing trivia will be discarded, with the assumption that the formatter will have inserted
2045
+ /// some kind of break there that would be more appropriate (and we want to avoid inserting
2046
+ /// trailing whitespace on a line).
2047
+ ///
2048
+ /// The choices above are admittedly somewhat arbitrary, but given that garbage text in trailing
2049
+ /// trivia represents a malformed input (as opposed to garbage text in leading trivia, which has
2050
+ /// some legitimate uses), this is a reasonable compromise to keep the garbage text roughly in the
2051
+ /// same place but still let surrounding formatting occur somewhat as expected.
2052
+ private func appendTrailingTrivia( _ token: TokenSyntax ) {
2053
+ let trailingTrivia = Array ( token. trailingTrivia)
2054
+ guard let lastGarbageIndex = trailingTrivia. lastIndex ( where: { $0. isGarbageText } ) else {
2055
+ return
2056
+ }
2057
+
2058
+ var verbatimText = " "
2059
+ for piece in trailingTrivia [ ... lastGarbageIndex] {
2060
+ switch piece {
2061
+ case . garbageText, . spaces, . tabs, . formfeeds, . verticalTabs:
2062
+ piece. write ( to: & verbatimText)
2063
+ default :
2064
+ // The implementation of the lexer today ensures that newlines, carriage returns, and
2065
+ // comments will not be present in trailing trivia. Ignore them for now (rather than assert,
2066
+ // in case that changes in a future version).
2067
+ break
2068
+ }
2069
+ }
2070
+
2071
+ appendToken ( . verbatim( Verbatim ( text: verbatimText, indentingBehavior: . none) ) )
2072
+ }
2073
+
2036
2074
/// Appends the after-tokens and trailing comments (if present) of the given syntax token
2037
2075
/// to the token stream.
2038
2076
///
@@ -2399,7 +2437,11 @@ private final class TokenStreamCreator: SyntaxVisitor {
2399
2437
}
2400
2438
}
2401
2439
2402
- var lastPieceWasLineComment = false
2440
+ // Updated throughout the loop to indicate whether the next newline *must* be honored (for
2441
+ // example, even if discretionary newlines are discarded). This is the case when the preceding
2442
+ // trivia was a line comment or garbage text.
2443
+ var requiresNextNewline = false
2444
+
2403
2445
for (index, piece) in trivia. enumerated ( ) {
2404
2446
if let cutoff = cutoffIndex, index == cutoff { break }
2405
2447
switch piece {
@@ -2409,7 +2451,7 @@ private final class TokenStreamCreator: SyntaxVisitor {
2409
2451
appendNewlines ( . soft)
2410
2452
isStartOfFile = false
2411
2453
}
2412
- lastPieceWasLineComment = true
2454
+ requiresNextNewline = true
2413
2455
2414
2456
case . blockComment( let text) :
2415
2457
if index > 0 || isStartOfFile {
@@ -2420,39 +2462,51 @@ private final class TokenStreamCreator: SyntaxVisitor {
2420
2462
appendToken ( . break( . same, size: 0 ) )
2421
2463
isStartOfFile = false
2422
2464
}
2423
- lastPieceWasLineComment = false
2465
+ requiresNextNewline = false
2424
2466
2425
2467
case . docLineComment( let text) :
2426
2468
appendToken ( . comment( Comment ( kind: . docLine, text: text) , wasEndOfLine: false ) )
2427
2469
appendNewlines ( . soft)
2428
2470
isStartOfFile = false
2429
- lastPieceWasLineComment = true
2471
+ requiresNextNewline = true
2430
2472
2431
2473
case . docBlockComment( let text) :
2432
2474
appendToken ( . comment( Comment ( kind: . docBlock, text: text) , wasEndOfLine: false ) )
2433
2475
appendNewlines ( . soft)
2434
2476
isStartOfFile = false
2435
- lastPieceWasLineComment = false
2477
+ requiresNextNewline = false
2436
2478
2437
2479
case . newlines( let count) , . carriageReturns( let count) , . carriageReturnLineFeeds( let count) :
2438
2480
guard !isStartOfFile else { break }
2439
- // Even if we aren't respecting discretionary newlines, there must always be a newline after
2440
- // a line comment.
2441
- if lastPieceWasLineComment ||
2481
+
2482
+ if requiresNextNewline ||
2442
2483
( config. respectsExistingLineBreaks && isDiscretionaryNewlineAllowed ( before: token) )
2443
2484
{
2444
2485
appendNewlines ( . soft( count: count, discretionary: true ) )
2445
2486
} else {
2446
2487
// Even if discretionary line breaks are not being respected, we still respect multiple
2447
2488
// line breaks in order to keep blank separator lines that the user might want.
2448
2489
// TODO: It would be nice to restrict this to only allow multiple lines between statements
2449
- // and declarations; as currently implemented, multiple newlines will locally the
2490
+ // and declarations; as currently implemented, multiple newlines will locally ignore the
2450
2491
// configuration setting.
2451
2492
if count > 1 {
2452
2493
appendNewlines ( . soft( count: count, discretionary: true ) )
2453
2494
}
2454
2495
}
2455
2496
2497
+ case . garbageText( let text) :
2498
+ // Garbage text in leading trivia might be something meaningful that would be disruptive to
2499
+ // throw away when formatting the file, like a hashbang line or Unicode byte-order marker at
2500
+ // the beginning of a file, or source control conflict markers. Keep it as verbatim text so
2501
+ // that it is printed exactly as we got it.
2502
+ appendToken ( . verbatim( Verbatim ( text: text, indentingBehavior: . none) ) )
2503
+
2504
+ // Unicode byte-order markers shouldn't allow leading newlines to otherwise appear in the
2505
+ // file, nor should they modify our detection of the beginning of the file.
2506
+ let isBOM = text == " \u{feff} "
2507
+ requiresNextNewline = !isBOM
2508
+ isStartOfFile = isStartOfFile && isBOM
2509
+
2456
2510
default :
2457
2511
break
2458
2512
}
@@ -2958,6 +3012,16 @@ extension Collection {
2958
3012
}
2959
3013
}
2960
3014
3015
+ extension TriviaPiece {
3016
+ /// True if the trivia piece is garbage text.
3017
+ fileprivate var isGarbageText : Bool {
3018
+ switch self {
3019
+ case . garbageText: return true
3020
+ default : return false
3021
+ }
3022
+ }
3023
+ }
3024
+
2961
3025
/// Returns whether the given trivia includes a directive to ignore formatting for the next node.
2962
3026
///
2963
3027
/// - Parameter trivia: Leading trivia for a node that the formatter supports ignoring.
0 commit comments