Use ranges to represent tokens and improve overlap checks

fwcd · fwcd · commit 8f1001a61aec · 2021-07-20T15:09:59.000+02:00
- Simplify overlap checks
- Add makeToken helper in SemanticTokensTests
- Represent SyntaxHighlightingTokens using ranges internally
- Fix lengths for function signatures with special characters
  e.g. emojis
- Fix handling of the empty range-case in updateLexicalAndSyntacticTokens
diff --git a/Sources/SKTestSupport/Array+SyntaxHighlightingToken.swift b/Sources/SKTestSupport/Array+SyntaxHighlightingToken.swift
@@ -42,7 +42,7 @@ extension Array where Element == SyntaxHighlightingToken {
 
       append(SyntaxHighlightingToken(
         start: current,
-        length: length,
+        utf16length: length,
         kind: kind,
         modifiers: modifiers
       ))
diff --git a/Sources/SourceKitLSP/DocumentManager.swift b/Sources/SourceKitLSP/DocumentManager.swift
@@ -177,10 +177,8 @@ public final class DocumentManager {
           document.latestTokens.withMutableTokensOfEachKind { tokens in
             tokens = Array(tokens.lazy
               .filter {
-                // Only keep tokens that don't overlap or bound with the edit range
-                $0.start >= range.upperBound
-                || range.lowerBound >= $0.sameLineEnd
-                || range.isEmpty
+                // Only keep tokens that don't overlap with the edit range
+                !$0.range.overlaps(range)
               }
               .map {
                 // Shift tokens after the edit range
@@ -263,13 +261,10 @@ public final class DocumentManager {
         throw Error.missingDocument(uri)
       }
 
-      // Remove all tokens in `range` (or the entire document if `range` is `nil`)
+      // Remove all tokens that overlap with `range`
+      // (or the entire document if `range` is `nil`)
       document.latestTokens.lexical.removeAll { token in
-        range.map {
-          token.start <= $0.upperBound
-          && $0.lowerBound <= token.sameLineEnd
-          && !$0.isEmpty
-        } ?? true
+        range.map { token.range.overlaps($0) } ?? true
       }
 
       document.latestTokens.lexical += newTokens
diff --git a/Sources/SourceKitLSP/Swift/SwiftLanguageServer.swift b/Sources/SourceKitLSP/Swift/SwiftLanguageServer.swift
@@ -169,21 +169,27 @@ public final class SwiftLanguageServer: ToolchainLanguageServer {
   ) {
     dispatchPrecondition(condition: .onQueue(queue))
 
+    guard let offset: Int = response[keys.offset],
+          let length: Int = response[keys.length],
+          let start: Position = snapshot.positionOf(utf8Offset: offset),
+          let end: Position = snapshot.positionOf(utf8Offset: offset + length) else {
+      log("updateLexicalAndSyntacticTokens failed, no range found", level: .error)
+      return
+    }
+
     let uri = snapshot.document.uri
+    let range = start..<end
+
+    // If the range is empty we don't have to (and shouldn't) update anything.
+    // This is important, since the substructure may be empty, causing us to
+    // unnecessarily remove all syntactic tokens.
+    guard !range.isEmpty else {
+      return
+    }
 
     if let syntaxMap: SKDResponseArray = response[keys.syntaxmap] {
       let tokenParser = SyntaxHighlightingTokenParser(sourcekitd: sourcekitd)
       let tokens = tokenParser.parseTokens(syntaxMap, in: snapshot)
-      let range: Range<Position>?
-
-      if let offset: Int = response[keys.offset],
-         let length: Int = response[keys.length],
-         let start: Position = snapshot.positionOf(utf8Offset: offset),
-         let end: Position = snapshot.positionOf(utf8Offset: offset + length) {
-        range = start..<end
-      } else {
-        range = nil
-      }
 
       do {
         try documentManager.replaceLexicalTokens(uri, in: range, with: tokens)
@@ -523,7 +529,7 @@ extension SwiftLanguageServer {
           // empty range for an edit, causing all syntactic tokens to get removed
           // therefore we only update them if the range is non-empty.
 
-          if !(edit.range?.isEmpty ?? false), let dict = lastResponse, let snapshot = self.documentManager.latestSnapshot(uri) {
+          if let dict = lastResponse, let snapshot = self.documentManager.latestSnapshot(uri) {
             self.updateLexicalAndSyntacticTokens(response: dict, for: snapshot)
           }
         }
@@ -843,7 +849,7 @@ extension SwiftLanguageServer {
         return
       }
 
-      let tokens = snapshot.tokens.mergedAndSorted.filter { $0.sameLineRange.overlaps(range) }
+      let tokens = snapshot.tokens.mergedAndSorted.filter { $0.range.overlaps(range) }
       let encodedTokens = tokens.lspEncoded
 
       req.reply(DocumentSemanticTokensResponse(data: encodedTokens))
diff --git a/Sources/SourceKitLSP/Swift/SyntaxHighlightingToken.swift b/Sources/SourceKitLSP/Swift/SyntaxHighlightingToken.swift
@@ -16,56 +16,48 @@ import LSPLogging
 
 /// A ranged token in the document used for syntax highlighting.
 public struct SyntaxHighlightingToken: Hashable {
-  public var start: Position
-  public var length: Int
+  /// The range of the token in the document. Must be on a single line.
+  public var range: Range<Position> {
+    didSet {
+      assert(range.lowerBound.line == range.upperBound.line)
+    }
+  }
+  /// The token type.
   public var kind: Kind
+  /// Additional metadata about the token.
   public var modifiers: Modifiers
 
-  /// The end of a token. Note that this requires the token to be
-  /// on a single line, which is the case for all tokens emitted
-  /// by parseTokens, however.
-  public var sameLineEnd: Position {
-    Position(line: start.line, utf16index: start.utf16index + length)
+  /// The (inclusive) start position of the token.
+  /// Setting it shifts the token and preserves the length.
+  public var start: Position {
+    get { range.lowerBound }
+    set {
+      let length = utf16length
+      range = newValue..<Position(line: newValue.line, utf16index: newValue.utf16index + length)
+    }
   }
-  public var sameLineRange: Range<Position> {
-    start..<sameLineEnd
+  /// The (exclusive) end position of the token.
+  public var end: Position { range.upperBound }
+  /// The length of the token in UTF-16 code units.
+  public var utf16length: Int {
+    get { end.utf16index - start.utf16index }
+    set {
+      assert(newValue >= 0)
+      range = start..<Position(line: start.line, utf16index: start.utf16index + newValue)
+    }
   }
 
-  public init(
-    start: Position,
-    length: Int,
-    kind: Kind,
-    modifiers: Modifiers = []
-  ) {
-    self.start = start
-    self.length = length
+  public init(range: Range<Position>, kind: Kind, modifiers: Modifiers = []) {
+    assert(range.lowerBound.line == range.upperBound.line)
+
+    self.range = range
     self.kind = kind
     self.modifiers = modifiers
   }
 
-  /// Splits a potentially multi-line token to multiple single-line tokens.
-  public func splitToSingleLineTokens(in snapshot: DocumentSnapshot) -> [Self] {
-    guard let startIndex = snapshot.index(of: start) else {
-      fatalError("Token \(self) begins outside of the document")
-    }
-
-    let endIndex = snapshot.text.index(startIndex, offsetBy: length)
-    let text = snapshot.text[startIndex..<endIndex]
-    let lines = text.split(separator: "\n")
-
-    return lines
-      .enumerated()
-      .map { (i, content) in
-        Self(
-          start: Position(
-            line: start.line + i,
-            utf16index: i == 0 ? start.utf16index : 0
-          ),
-          length: content.count,
-          kind: kind,
-          modifiers: modifiers
-        )
-      }
+  public init(start: Position, utf16length: Int, kind: Kind, modifiers: Modifiers = []) {
+    let range = start..<Position(line: start.line, utf16index: start.utf16index + utf16length)
+    self.init(range: range, kind: kind, modifiers: modifiers)
   }
 
   /// The token type. Represented using an int to make the conversion to
@@ -223,7 +215,7 @@ extension Array where Element == SyntaxHighlightingToken {
       rawTokens += [
         UInt32(lineDelta),
         UInt32(charDelta),
-        UInt32(token.length),
+        UInt32(token.utf16length),
         token.kind.rawValue,
         token.modifiers.rawValue
       ]
@@ -236,8 +228,35 @@ extension Array where Element == SyntaxHighlightingToken {
   /// preferring the given array's tokens if duplicate ranges are
   /// found.
   public func mergingTokens(with other: [SyntaxHighlightingToken]) -> [SyntaxHighlightingToken] {
-    let otherRanges = Set(other.map(\.sameLineRange))
-    return filter { !otherRanges.contains($0.sameLineRange) } + other
+    let otherRanges = Set(other.map(\.range))
+    return filter { !otherRanges.contains($0.range) } + other
+  }
+}
+
+extension Range where Bound == Position {
+  /// Splits a potentially multi-line range to multiple single-line ranges.
+  fileprivate func splitToSingleLineRanges(in snapshot: DocumentSnapshot) -> [Self] {
+    guard let startIndex = snapshot.index(of: lowerBound),
+          let endIndex = snapshot.index(of: upperBound) else {
+      fatalError("Range \(self) reaches outside of the document")
+    }
+
+    let text = snapshot.text[startIndex..<endIndex]
+    let lines = text.split(separator: "\n")
+
+    return lines
+      .enumerated()
+      .map { (i, content) in
+        let start = Position(
+          line: lowerBound.line + i,
+          utf16index: i == 0 ? lowerBound.utf16index : 0
+        )
+        let end = Position(
+          line: start.line,
+          utf16index: start.utf16index + content.utf16.count
+        )
+        return start..<end
+      }
   }
 }
 
@@ -271,7 +290,7 @@ struct SyntaxHighlightingTokenParser {
       if useName && [.function, .method, .enumMember].contains(kind) && modifiers.contains(.declaration),
          let name: String = response[keys.name],
          name.contains("("),
-         let funcNameLength: Int = name.split(separator: "(").first?.utf16.count {
+         let funcNameLength: Int = name.split(separator: "(").first?.utf8.count {
         length = funcNameLength
       }
 
@@ -282,14 +301,18 @@ struct SyntaxHighlightingTokenParser {
         length += 2
       }
 
-      let multiLineToken = SyntaxHighlightingToken(
-        start: start,
-        length: length,
-        kind: kind,
-        modifiers: modifiers
-      )
-
-      tokens += multiLineToken.splitToSingleLineTokens(in: snapshot)
+      if let end: Position = snapshot.positionOf(utf8Offset: offset + length) {
+        let multiLineRange = start..<end
+        let ranges = multiLineRange.splitToSingleLineRanges(in: snapshot)
+
+        tokens += ranges.map {
+          SyntaxHighlightingToken(
+            range: $0,
+            kind: kind,
+            modifiers: modifiers
+          )
+        }
+      }
     }
 
     if let substructure: SKDResponseArray = response[keys.substructure] {
diff --git a/Tests/SourceKitLSPTests/SemanticTokensTests.swift b/Tests/SourceKitLSPTests/SemanticTokensTests.swift