Skip to content

Commit 63366a8

Browse files
authored
Merge pull request swiftlang#127 from allevato/garbage-correction
Preserve garbage text trivia when pretty printing.
2 parents c79935d + 9107050 commit 63366a8

File tree

6 files changed

+302
-47
lines changed

6 files changed

+302
-47
lines changed

Sources/SwiftFormatPrettyPrint/PrettyPrint.swift

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -554,14 +554,7 @@ public class PrettyPrinter {
554554
total += wasEndOfLine ? 0 : comment.length
555555

556556
case .verbatim(let verbatim):
557-
var length: Int
558-
if verbatim.lines.count > 1 {
559-
length = maxLineLength
560-
} else if verbatim.lines.count == 0 {
561-
length = 0
562-
} else {
563-
length = verbatim.lines[0].count
564-
}
557+
let length = verbatim.prettyPrintingLength(maximum: maxLineLength)
565558
lengths.append(length)
566559
total += length
567560

Sources/SwiftFormatPrettyPrint/Token.swift

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -201,8 +201,4 @@ enum Token {
201201
static func `break`(_ kind: BreakKind, newlines: NewlineBehavior) -> Token {
202202
return .break(kind, size: 1, newlines: newlines)
203203
}
204-
205-
static func verbatim(text: String) -> Token {
206-
return Token.verbatim(Verbatim(text: text))
207-
}
208204
}

Sources/SwiftFormatPrettyPrint/TokenStreamCreator.swift

Lines changed: 74 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ private final class TokenStreamCreator: SyntaxVisitor {
132132
appendBeforeTokens(firstToken)
133133
}
134134

135-
appendToken(.verbatim(Verbatim(text: node.description)))
135+
appendToken(.verbatim(Verbatim(text: node.description, indentingBehavior: .allLines)))
136136

137137
if let lastToken = node.lastToken {
138138
// Extract any comments that trail the verbatim block since they belong to the next syntax
@@ -1978,6 +1978,7 @@ private final class TokenStreamCreator: SyntaxVisitor {
19781978
appendToken(.syntax(token.text))
19791979
}
19801980

1981+
appendTrailingTrivia(token)
19811982
appendAfterTokensAndTrailingComments(token)
19821983

19831984
// It doesn't matter what we return here, tokens do not have children.
@@ -2033,6 +2034,43 @@ private final class TokenStreamCreator: SyntaxVisitor {
20332034
}
20342035
}
20352036

2037+
/// Handle trailing trivia that might contain garbage text that we don't want to indiscriminantly
2038+
/// discard.
2039+
///
2040+
/// In syntactically valid code, trailing trivia will only contain spaces or tabs, so we can
2041+
/// usually ignore it entirely. If there is garbage text after a token, however, then we preserve
2042+
/// it (and any whitespace immediately before it) and "glue" it to the end of the preceding token
2043+
/// using a `verbatim` formatting token. Any whitespace following the last garbage text in the
2044+
/// trailing trivia will be discarded, with the assumption that the formatter will have inserted
2045+
/// some kind of break there that would be more appropriate (and we want to avoid inserting
2046+
/// trailing whitespace on a line).
2047+
///
2048+
/// The choices above are admittedly somewhat arbitrary, but given that garbage text in trailing
2049+
/// trivia represents a malformed input (as opposed to garbage text in leading trivia, which has
2050+
/// some legitimate uses), this is a reasonable compromise to keep the garbage text roughly in the
2051+
/// same place but still let surrounding formatting occur somewhat as expected.
2052+
private func appendTrailingTrivia(_ token: TokenSyntax) {
2053+
let trailingTrivia = Array(token.trailingTrivia)
2054+
guard let lastGarbageIndex = trailingTrivia.lastIndex(where: { $0.isGarbageText }) else {
2055+
return
2056+
}
2057+
2058+
var verbatimText = ""
2059+
for piece in trailingTrivia[...lastGarbageIndex] {
2060+
switch piece {
2061+
case .garbageText, .spaces, .tabs, .formfeeds, .verticalTabs:
2062+
piece.write(to: &verbatimText)
2063+
default:
2064+
// The implementation of the lexer today ensures that newlines, carriage returns, and
2065+
// comments will not be present in trailing trivia. Ignore them for now (rather than assert,
2066+
// in case that changes in a future version).
2067+
break
2068+
}
2069+
}
2070+
2071+
appendToken(.verbatim(Verbatim(text: verbatimText, indentingBehavior: .none)))
2072+
}
2073+
20362074
/// Appends the after-tokens and trailing comments (if present) of the given syntax token
20372075
/// to the token stream.
20382076
///
@@ -2397,7 +2435,11 @@ private final class TokenStreamCreator: SyntaxVisitor {
23972435
}
23982436
}
23992437

2400-
var lastPieceWasLineComment = false
2438+
// Updated throughout the loop to indicate whether the next newline *must* be honored (for
2439+
// example, even if discretionary newlines are discarded). This is the case when the preceding
2440+
// trivia was a line comment or garbage text.
2441+
var requiresNextNewline = false
2442+
24012443
for (index, piece) in trivia.enumerated() {
24022444
if let cutoff = cutoffIndex, index == cutoff { break }
24032445
switch piece {
@@ -2407,7 +2449,7 @@ private final class TokenStreamCreator: SyntaxVisitor {
24072449
appendNewlines(.soft)
24082450
isStartOfFile = false
24092451
}
2410-
lastPieceWasLineComment = true
2452+
requiresNextNewline = true
24112453

24122454
case .blockComment(let text):
24132455
if index > 0 || isStartOfFile {
@@ -2418,39 +2460,51 @@ private final class TokenStreamCreator: SyntaxVisitor {
24182460
appendToken(.break(.same, size: 0))
24192461
isStartOfFile = false
24202462
}
2421-
lastPieceWasLineComment = false
2463+
requiresNextNewline = false
24222464

24232465
case .docLineComment(let text):
24242466
appendToken(.comment(Comment(kind: .docLine, text: text), wasEndOfLine: false))
24252467
appendNewlines(.soft)
24262468
isStartOfFile = false
2427-
lastPieceWasLineComment = true
2469+
requiresNextNewline = true
24282470

24292471
case .docBlockComment(let text):
24302472
appendToken(.comment(Comment(kind: .docBlock, text: text), wasEndOfLine: false))
24312473
appendNewlines(.soft)
24322474
isStartOfFile = false
2433-
lastPieceWasLineComment = false
2475+
requiresNextNewline = false
24342476

24352477
case .newlines(let count), .carriageReturns(let count), .carriageReturnLineFeeds(let count):
24362478
guard !isStartOfFile else { break }
2437-
// Even if we aren't respecting discretionary newlines, there must always be a newline after
2438-
// a line comment.
2439-
if lastPieceWasLineComment ||
2479+
2480+
if requiresNextNewline ||
24402481
(config.respectsExistingLineBreaks && isDiscretionaryNewlineAllowed(before: token))
24412482
{
24422483
appendNewlines(.soft(count: count, discretionary: true))
24432484
} else {
24442485
// Even if discretionary line breaks are not being respected, we still respect multiple
24452486
// line breaks in order to keep blank separator lines that the user might want.
24462487
// TODO: It would be nice to restrict this to only allow multiple lines between statements
2447-
// and declarations; as currently implemented, multiple newlines will locally the
2488+
// and declarations; as currently implemented, multiple newlines will locally ignore the
24482489
// configuration setting.
24492490
if count > 1 {
24502491
appendNewlines(.soft(count: count, discretionary: true))
24512492
}
24522493
}
24532494

2495+
case .garbageText(let text):
2496+
// Garbage text in leading trivia might be something meaningful that would be disruptive to
2497+
// throw away when formatting the file, like a hashbang line or Unicode byte-order marker at
2498+
// the beginning of a file, or source control conflict markers. Keep it as verbatim text so
2499+
// that it is printed exactly as we got it.
2500+
appendToken(.verbatim(Verbatim(text: text, indentingBehavior: .none)))
2501+
2502+
// Unicode byte-order markers shouldn't allow leading newlines to otherwise appear in the
2503+
// file, nor should they modify our detection of the beginning of the file.
2504+
let isBOM = text == "\u{feff}"
2505+
requiresNextNewline = !isBOM
2506+
isStartOfFile = isStartOfFile && isBOM
2507+
24542508
default:
24552509
break
24562510
}
@@ -2956,6 +3010,16 @@ extension Collection {
29563010
}
29573011
}
29583012

3013+
extension TriviaPiece {
3014+
/// True if the trivia piece is garbage text.
3015+
fileprivate var isGarbageText: Bool {
3016+
switch self {
3017+
case .garbageText: return true
3018+
default: return false
3019+
}
3020+
}
3021+
}
3022+
29593023
/// Returns whether the given trivia includes a directive to ignore formatting for the next node.
29603024
///
29613025
/// - Parameter trivia: Leading trivia for a node that the formatter supports ignoring.

Sources/SwiftFormatPrettyPrint/Verbatim.swift

Lines changed: 65 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -26,31 +26,69 @@ enum IndentingBehavior {
2626
}
2727

2828
struct Verbatim {
29-
let indentingBehavior: IndentingBehavior
30-
var lines: [String] = []
31-
var leadingWhitespaceCounts: [Int] = []
29+
/// The behavior used to adjust indentation when printing verbatim content.
30+
private let indentingBehavior: IndentingBehavior
3231

33-
init(text: String, indentingBehavior: IndentingBehavior = .allLines) {
32+
/// The lines of verbatim text.
33+
private let lines: [String]
34+
35+
/// The number of leading whitespaces to print for each line of verbatim content, not including
36+
/// any additional indentation requested externally.
37+
private let leadingWhitespaceCounts: [Int]
38+
39+
init(text: String, indentingBehavior: IndentingBehavior) {
3440
self.indentingBehavior = indentingBehavior
35-
tokenizeTextAndTrimWhitespace(text: text)
36-
}
3741

38-
mutating func tokenizeTextAndTrimWhitespace(text: String) {
39-
lines = text.split(separator: "\n", omittingEmptySubsequences: false).map { String($0) }
42+
var originalLines = text.split(separator: "\n", omittingEmptySubsequences: false)
4043

4144
// Prevents an extra leading new line from being created.
42-
if lines[0] == "" {
43-
lines.remove(at: 0)
45+
if originalLines[0].isEmpty {
46+
originalLines.remove(at: 0)
47+
}
48+
49+
// If we have no lines left (or none with any content), just initialize everything empty and
50+
// exit.
51+
guard
52+
!originalLines.isEmpty,
53+
let index = originalLines.firstIndex(where: { !$0.isEmpty })
54+
else {
55+
self.lines = []
56+
self.leadingWhitespaceCounts = []
57+
return
4458
}
4559

46-
guard lines.count > 0, let index = lines.firstIndex(where: { $0 != "" }) else { return }
60+
// If our indenting behavior is `none`, then keep the original lines _exactly_ as is---don't
61+
// attempt to calculate or trim their leading indentation.
62+
guard indentingBehavior != .none else {
63+
self.lines = originalLines.map(String.init)
64+
self.leadingWhitespaceCounts = [Int](repeating: 0, count: originalLines.count)
65+
return
66+
}
4767

48-
// Get the number of leading whitespaces of the first line, and subract this from the number of
49-
// leading whitespaces for subsequent lines (if possible). Record the new leading whitespaces
50-
// counts, and trim off whitespace from the ends of the strings.
51-
let count = countLeadingWhitespaces(text: lines[index])
52-
leadingWhitespaceCounts = lines.map { max(countLeadingWhitespaces(text: $0) - count, 0) }
53-
lines = lines.map { $0.trimmingCharacters(in: CharacterSet(charactersIn: " ")) }
68+
// Otherwise, we're in one of the indentation compensating modes. Get the number of leading
69+
// whitespaces of the first line, and subtract this from the number of leading whitespaces for
70+
// subsequent lines (if possible). Record the new leading whitespaces counts, and trim off
71+
// whitespace from the ends of the strings.
72+
let firstLineLeadingSpaceCount = numberOfLeadingSpaces(in: originalLines[index])
73+
self.leadingWhitespaceCounts = originalLines.map {
74+
max(numberOfLeadingSpaces(in: $0) - firstLineLeadingSpaceCount, 0)
75+
}
76+
self.lines = originalLines.map { $0.trimmingCharacters(in: CharacterSet(charactersIn: " ")) }
77+
}
78+
79+
/// Returns the length that the pretty printer should use when determining layout for this
80+
/// verbatim content.
81+
///
82+
/// Specifically, multiline content should have a length equal to the maximum (to force breaking),
83+
/// while single-line content should have its natural length.
84+
func prettyPrintingLength(maximum: Int) -> Int {
85+
if lines.isEmpty {
86+
return 0
87+
}
88+
if lines.count > 1 {
89+
return maximum
90+
}
91+
return lines[0].count
5492
}
5593

5694
func print(indent: [Indent]) -> String {
@@ -60,11 +98,12 @@ struct Verbatim {
6098
switch indentingBehavior {
6199
case .firstLine where i == 0, .allLines:
62100
output += indent.indentation()
63-
break
64101
case .none, .firstLine:
65102
break
66103
}
67-
output += String(repeating: " ", count: leadingWhitespaceCounts[i])
104+
if leadingWhitespaceCounts[i] > 0 {
105+
output += String(repeating: " ", count: leadingWhitespaceCounts[i])
106+
}
68107
output += lines[i]
69108
}
70109
if i < lines.count - 1 {
@@ -73,12 +112,13 @@ struct Verbatim {
73112
}
74113
return output
75114
}
115+
}
76116

77-
func countLeadingWhitespaces(text: String) -> Int {
78-
var count = 0
79-
for char in text {
80-
if char == " " { count += 1 } else { break }
81-
}
82-
return count
117+
/// Returns the leading number of spaces in the given string.
118+
fileprivate func numberOfLeadingSpaces(in text: Substring) -> Int {
119+
var count = 0
120+
for char in text {
121+
if char == " " { count += 1 } else { break }
83122
}
123+
return count
84124
}

0 commit comments

Comments
 (0)