Update swift-format to account for new multiline string tree structure.

allevato · allevato · commit 02a366d843af · 2023-01-27T14:06:20.000-08:00
This is a companion to swiftlang/swift-syntax#1255. The new structure of multiline strings yielded some nice cleanup of the way we handle those strings *directly*, but to keep the existing indentation decisions, some parts of multiline string processing bled out into other areas. Such is life.
diff --git a/Sources/SwiftFormatCore/LegacyTriviaBehavior.swift b/Sources/SwiftFormatCore/LegacyTriviaBehavior.swift
@@ -36,7 +36,7 @@ private final class LegacyTriviaBehaviorRewriter: SyntaxRewriter {
 /// behavior.
 private func shouldTriviaPieceBeMoved(_ piece: TriviaPiece) -> Bool {
   switch piece {
-  case .spaces, .tabs, .unexpectedText:
+  case .spaces, .tabs, .unexpectedText, .backslashes:
     return false
   default:
     return true
diff --git a/Sources/SwiftFormatCore/Trivia+Convenience.swift b/Sources/SwiftFormatCore/Trivia+Convenience.swift
@@ -153,4 +153,14 @@ extension Trivia {
         return false
       })
   }
+
+  /// Returns `true` if this trivia contains any backslahes (used for multiline string newline
+  /// suppression).
+  public var containsBackslashes: Bool {
+    return contains(
+      where: {
+        if case .backslashes = $0 { return true }
+        return false
+      })
+  }
 }
diff --git a/Sources/SwiftFormatPrettyPrint/TokenStreamCreator.swift b/Sources/SwiftFormatPrettyPrint/TokenStreamCreator.swift
@@ -33,10 +33,10 @@ fileprivate final class TokenStreamCreator: SyntaxVisitor {
   /// appended since that break.
   private var canMergeNewlinesIntoLastBreak = false
 
-  /// Keeps track of the prefix length of multiline string segments when they are visited so that
-  /// the prefix can be stripped at the beginning of lines before the text is added to the token
-  /// stream.
-  private var pendingMultilineStringSegmentPrefixLengths = [TokenSyntax: Int]()
+  /// Keeps track of the kind of break that should be used inside a multiline string. This differs
+  /// depending on surrounding context due to some tricky special cases, so this lets us pass that
+  /// information down to the strings that need it.
+  private var pendingMultilineStringBreakKinds = [StringLiteralExprSyntax: BreakKind]()
 
   /// Lists tokens that shouldn't be appended to the token stream as `syntax` tokens. They will be
   /// printed conditionally using a different type of token.
@@ -659,7 +659,14 @@ fileprivate final class TokenStreamCreator: SyntaxVisitor {
   }
 
   override func visit(_ node: ReturnStmtSyntax) -> SyntaxVisitorContinueKind {
-    before(node.expression?.firstToken, tokens: .break)
+    if let expression = node.expression {
+      if leftmostMultilineStringLiteral(of: expression) != nil {
+        before(expression.firstToken, tokens: .break(.open))
+        after(expression.lastToken, tokens: .break(.close(mustBreak: false)))
+      } else {
+        before(expression.firstToken, tokens: .break)
+      }
+    }
     return .visitChildren
   }
 
@@ -1035,21 +1042,32 @@ fileprivate final class TokenStreamCreator: SyntaxVisitor {
       before(node.firstToken, tokens: .open)
     }
 
-    // If we have an open delimiter following the colon, use a space instead of a continuation
-    // break so that we don't awkwardly shift the delimiter down and indent it further if it
-    // wraps.
-    let tokenAfterColon: Token = startsWithOpenDelimiter(Syntax(node.expression)) ? .space : .break
+    var additionalEndTokens = [Token]()
+    if let colon = node.colon {
+      // If we have an open delimiter following the colon, use a space instead of a continuation
+      // break so that we don't awkwardly shift the delimiter down and indent it further if it
+      // wraps.
+      var tokensAfterColon: [Token] = [
+        startsWithOpenDelimiter(Syntax(node.expression)) ? .space : .break
+      ]
 
-    after(node.colon, tokens: tokenAfterColon)
+      if leftmostMultilineStringLiteral(of: node.expression) != nil {
+        tokensAfterColon.append(.break(.open(kind: .block), size: 0))
+        additionalEndTokens = [.break(.close(mustBreak: false), size: 0)]
+      }
+
+      after(colon, tokens: tokensAfterColon)
+    }
 
     if let trailingComma = node.trailingComma {
+      before(trailingComma, tokens: additionalEndTokens)
       var afterTrailingComma: [Token] = [.break(.same)]
       if shouldGroup {
         afterTrailingComma.insert(.close, at: 0)
       }
       after(trailingComma, tokens: afterTrailingComma)
     } else if shouldGroup {
-      after(node.lastToken, tokens: .close)
+      after(node.lastToken, tokens: additionalEndTokens + [.close])
     }
   }
 
@@ -1774,8 +1792,9 @@ fileprivate final class TokenStreamCreator: SyntaxVisitor {
 
         // If the rhs starts with a parenthesized expression, stack indentation around it.
         // Otherwise, use regular continuation breaks.
-        if let (unindentingNode, _) = stackedIndentationBehavior(after: binOp, rhs: rhs) {
-          beforeTokens = [.break(.open(kind: .continuation))]
+        if let (unindentingNode, _, breakKind) = stackedIndentationBehavior(after: binOp, rhs: rhs)
+        {
+          beforeTokens = [.break(.open(kind: breakKind))]
           after(unindentingNode.lastToken, tokens: [.break(.close(mustBreak: false), size: 0)])
         } else {
           beforeTokens = [.break(.continue)]
@@ -1790,7 +1809,7 @@ fileprivate final class TokenStreamCreator: SyntaxVisitor {
         }
 
         after(binOp.lastToken, tokens: beforeTokens)
-      } else if let (unindentingNode, shouldReset) =
+      } else if let (unindentingNode, shouldReset, breakKind) =
         stackedIndentationBehavior(after: binOp, rhs: rhs)
       {
         // For parenthesized expressions and for unparenthesized usages of `&&` and `||`, we don't
@@ -1800,7 +1819,7 @@ fileprivate final class TokenStreamCreator: SyntaxVisitor {
         // use open-continuation/close pairs around such operators and their right-hand sides so
         // that the continuation breaks inside those scopes "stack", instead of receiving the
         // usual single-level "continuation line or not" behavior.
-        let openBreakTokens: [Token] = [.break(.open(kind: .continuation)), .open]
+        let openBreakTokens: [Token] = [.break(.open(kind: breakKind)), .open]
         if wrapsBeforeOperator {
           before(binOp.firstToken, tokens: openBreakTokens)
         } else {
@@ -1921,8 +1940,8 @@ fileprivate final class TokenStreamCreator: SyntaxVisitor {
     if let initializer = node.initializer {
       let expr = initializer.value
 
-      if let (unindentingNode, _) = stackedIndentationBehavior(rhs: expr) {
-        after(initializer.equal, tokens: .break(.open(kind: .continuation)))
+      if let (unindentingNode, _, breakKind) = stackedIndentationBehavior(rhs: expr) {
+        after(initializer.equal, tokens: .break(.open(kind: breakKind)))
         after(unindentingNode.lastToken, tokens: .break(.close(mustBreak: false), size: 0))
       } else {
         after(initializer.equal, tokens: .break(.continue))
@@ -2100,32 +2119,48 @@ fileprivate final class TokenStreamCreator: SyntaxVisitor {
 
   override func visit(_ node: StringLiteralExprSyntax) -> SyntaxVisitorContinueKind {
     if node.openQuote.tokenKind == .multilineStringQuote {
-      // If it's a multiline string, the last segment of the literal will end with a newline and
-      // zero or more whitespace that indicates the amount of whitespace stripped from each line of
-      // the string literal.
-      if let lastSegment = node.segments.last?.as(StringSegmentSyntax.self),
-        let lastLine
-          = lastSegment.content.text.split(separator: "\n", omittingEmptySubsequences: false).last
-      {
-        let prefixCount = lastLine.count
-
-        // Segments may be `StringSegmentSyntax` or `ExpressionSegmentSyntax`; for the purposes of
-        // newline handling and whitespace stripping, we only need to handle the former.
-        for segmentSyntax in node.segments {
-          guard let segment = segmentSyntax.as(StringSegmentSyntax.self) else {
-            continue
-          }
-          // Register the content tokens of the segments and the amount of leading whitespace to
-          // strip; this will be retrieved when we visit the token.
-          pendingMultilineStringSegmentPrefixLengths[segment.content] = prefixCount
-        }
-      }
+      // Looks up the correct break kind based on prior context.
+      let breakKind = pendingMultilineStringBreakKinds[node, default: .same]
+      after(node.openQuote, tokens: .break(breakKind, size: 0, newlines: .hard(count: 1)))
+      before(node.closeQuote, tokens: .break(breakKind, newlines: .hard(count: 1)))
     }
     return .visitChildren
   }
 
   override func visit(_ node: StringSegmentSyntax) -> SyntaxVisitorContinueKind {
-    return .visitChildren
+    // Looks up the correct break kind based on prior context.
+    func breakKind() -> BreakKind {
+      if let stringLiteralSegments = node.parent?.as(StringLiteralSegmentsSyntax.self),
+        let stringLiteralExpr = stringLiteralSegments.parent?.as(StringLiteralExprSyntax.self)
+      {
+        return pendingMultilineStringBreakKinds[stringLiteralExpr, default: .same]
+      } else {
+        return .same
+      }
+    }
+
+    let segmentText = node.content.text
+    if segmentText.hasSuffix("\n") {
+      // If this is a multiline string segment, it will end in a newline. Remove the newline and
+      // append the rest of the string, followed by a break if it's not the last line before the
+      // closing quotes. (The `StringLiteralExpr` above does the closing break.)
+      let remainder = node.content.text.dropLast()
+      if !remainder.isEmpty {
+        appendToken(.syntax(String(remainder)))
+      }
+      appendToken(.break(breakKind(), newlines: .hard(count: 1)))
+    } else {
+      appendToken(.syntax(segmentText))
+    }
+
+    if node.trailingTrivia?.containsBackslashes == true {
+      // Segments with trailing backslashes won't end with a literal newline; the backslash is
+      // considered trivia. To preserve the original text and wrapping, we need to manually render
+      // the backslash and a break into the token stream.
+      appendToken(.syntax("\\"))
+      appendToken(.break(breakKind(), newlines: .hard(count: 1)))
+    }
+    return .skipChildren
   }
 
   override func visit(_ node: AssociatedtypeDeclSyntax) -> SyntaxVisitorContinueKind {
@@ -2343,9 +2378,7 @@ fileprivate final class TokenStreamCreator: SyntaxVisitor {
     extractLeadingTrivia(token)
     closeScopeTokens.forEach(appendToken)
 
-    if let pendingSegmentIndex = pendingMultilineStringSegmentPrefixLengths.index(forKey: token) {
-      appendMultilineStringSegments(at: pendingSegmentIndex)
-    } else if !ignoredTokens.contains(token) {
+    if !ignoredTokens.contains(token) {
       // Otherwise, it's just a regular token, so add the text as-is.
       appendToken(.syntax(token.presence == .present ? token.text : ""))
     }
@@ -2357,48 +2390,6 @@ fileprivate final class TokenStreamCreator: SyntaxVisitor {
     return .skipChildren
   }
 
-  /// Appends the contents of the pending multiline string segment at the given index in the
-  /// registration dictionary (removing it from that dictionary) to the token stream, splitting it
-  /// into lines along with required line breaks and stripping the leading whitespace.
-  private func appendMultilineStringSegments(at index: Dictionary<TokenSyntax, Int>.Index) {
-    let (token, prefixCount) = pendingMultilineStringSegmentPrefixLengths[index]
-    pendingMultilineStringSegmentPrefixLengths.remove(at: index)
-
-    let lines = token.text.split(separator: "\n", omittingEmptySubsequences: false)
-
-    // The first "line" is a special case. If it is non-empty, then it is a piece of text that
-    // immediately followed an interpolation segment on the same line of the string, like the
-    // " baz" in "foo bar \(x + y) baz". If that is the case, we need to insert that text before
-    // anything else.
-    let firstLine = lines.first!
-    if !firstLine.isEmpty {
-      appendToken(.syntax(String(firstLine)))
-    }
-
-    // Add the remaining lines of the segment, preceding each with a newline and stripping the
-    // leading whitespace so that the pretty-printer can re-indent the string according to the
-    // standard rules that it would apply.
-    for line in lines.dropFirst() as ArraySlice {
-      appendNewlines(.hard)
-
-      // Verify that the characters to be stripped are all spaces. If they are not, the string
-      // is not valid (no line should contain less leading whitespace than the line with the
-      // closing quotes), but the parser still allows this and it's flagged as an error later during
-      // compilation, so we don't want to destroy the user's text in that case.
-      let stringToAppend: Substring
-      if (line.prefix(prefixCount).allSatisfy { $0 == " " }) {
-        stringToAppend = line.dropFirst(prefixCount)
-      } else {
-        // Only strip as many spaces as we have. This will force the misaligned line to line up with
-        // the others; let's assume that's what the user wanted anyway.
-        stringToAppend = line.drop { $0 == " " }
-      }
-      if !stringToAppend.isEmpty {
-        appendToken(.syntax(String(stringToAppend)))
-      }
-    }
-  }
-
   /// Appends the before-tokens of the given syntax token to the token stream.
   private func appendBeforeTokens(_ token: TokenSyntax) {
     if let before = beforeMap.removeValue(forKey: token) {
@@ -3179,6 +3170,26 @@ fileprivate final class TokenStreamCreator: SyntaxVisitor {
     }
   }
 
+  /// Walks the expression and returns the leftmost multiline string literal (which might be the
+  /// expression itself) if the leftmost child is a multiline string literal.
+  ///
+  /// - Parameter expr: The expression whose leftmost multiline string literal should be returned.
+  /// - Returns: The leftmost multiline string literal, or nil if the leftmost subexpression was
+  ///   not a multiline string literal.
+  private func leftmostMultilineStringLiteral(of expr: ExprSyntax) -> StringLiteralExprSyntax? {
+    switch Syntax(expr).as(SyntaxEnum.self) {
+    case .stringLiteralExpr(let stringLiteralExpr)
+      where stringLiteralExpr.openQuote.tokenKind == .multilineStringQuote:
+      return stringLiteralExpr
+    case .infixOperatorExpr(let infixOperatorExpr):
+      return leftmostMultilineStringLiteral(of: infixOperatorExpr.leftOperand)
+    case .ternaryExpr(let ternaryExpr):
+      return leftmostMultilineStringLiteral(of: ternaryExpr.conditionExpression)
+    default:
+      return nil
+    }
+  }
+
   /// Returns the outermost node enclosing the given node whose closing delimiter(s) must be kept
   /// alongside the last token of the given node. Any tokens between `node.lastToken` and the
   /// returned node's `lastToken` are delimiter tokens that shouldn't be preceded by a break.
@@ -3208,7 +3219,7 @@ fileprivate final class TokenStreamCreator: SyntaxVisitor {
   private func stackedIndentationBehavior(
     after operatorExpr: ExprSyntax? = nil,
     rhs: ExprSyntax
-  ) -> (unindentingNode: Syntax, shouldReset: Bool)? {
+  ) -> (unindentingNode: Syntax, shouldReset: Bool, breakKind: OpenBreakKind)? {
     // Check for logical operators first, and if it's that kind of operator, stack indentation
     // around the entire right-hand-side. We have to do this check before checking the RHS for
     // parentheses because if the user writes something like `... && (foo) > bar || ...`, we don't
@@ -3227,9 +3238,10 @@ fileprivate final class TokenStreamCreator: SyntaxVisitor {
         // paren into the right hand side by unindenting after the final closing paren. This glues
         // the paren to the last token of `rhs`.
         if let unindentingParenExpr = outermostEnclosingNode(from: Syntax(rhs)) {
-          return (unindentingNode: unindentingParenExpr, shouldReset: true)
+          return (
+            unindentingNode: unindentingParenExpr, shouldReset: true, breakKind: .continuation)
         }
-        return (unindentingNode: Syntax(rhs), shouldReset: true)
+        return (unindentingNode: Syntax(rhs), shouldReset: true, breakKind: .continuation)
       }
     }
 
@@ -3238,7 +3250,9 @@ fileprivate final class TokenStreamCreator: SyntaxVisitor {
     if let ternaryExpr = rhs.as(TernaryExprSyntax.self) {
       // We don't try to absorb any parens in this case, because the condition of a ternary cannot
       // be grouped with any exprs outside of the condition.
-      return (unindentingNode: Syntax(ternaryExpr.conditionExpression), shouldReset: false)
+      return (
+        unindentingNode: Syntax(ternaryExpr.conditionExpression), shouldReset: false,
+        breakKind: .continuation)
     }
 
     // If the right-hand-side of the operator is or starts with a parenthesized expression, stack
@@ -3249,9 +3263,26 @@ fileprivate final class TokenStreamCreator: SyntaxVisitor {
       // paren into the right hand side by unindenting after the final closing paren. This glues the
       // paren to the last token of `rhs`.
       if let unindentingParenExpr = outermostEnclosingNode(from: Syntax(rhs)) {
-        return (unindentingNode: unindentingParenExpr, shouldReset: true)
+        return (unindentingNode: unindentingParenExpr, shouldReset: true, breakKind: .continuation)
+      }
+
+      if let innerExpr = parenthesizedExpr.elementList.first?.expression,
+        let stringLiteralExpr = innerExpr.as(StringLiteralExprSyntax.self),
+        stringLiteralExpr.openQuote.tokenKind == .multilineStringQuote
+      {
+        pendingMultilineStringBreakKinds[stringLiteralExpr] = .continue
+        return nil
       }
-      return (unindentingNode: Syntax(parenthesizedExpr), shouldReset: false)
+
+      return (
+        unindentingNode: Syntax(parenthesizedExpr), shouldReset: false, breakKind: .continuation)
+    }
+
+    // If the expression is a multiline string that is unparenthesized, create a block-based
+    // indentation scope and have the segments aligned inside it.
+    if let stringLiteralExpr = leftmostMultilineStringLiteral(of: rhs) {
+      pendingMultilineStringBreakKinds[stringLiteralExpr] = .same
+      return (unindentingNode: Syntax(stringLiteralExpr), shouldReset: false, breakKind: .block)
     }
 
     // Otherwise, don't stack--use regular continuation breaks instead.
diff --git a/Tests/SwiftFormatPrettyPrintTests/StringTests.swift b/Tests/SwiftFormatPrettyPrintTests/StringTests.swift