swiftlang · rintaro · Mar 8, 2021 · Feb 26, 2021 · Mar 3, 2021
diff --git a/include/swift-c/SyntaxParser/SwiftSyntaxParser.h b/include/swift-c/SyntaxParser/SwiftSyntaxParser.h
@@ -103,6 +103,8 @@ typedef struct {
   uint16_t leading_trivia_count;
   uint16_t trailing_trivia_count;
   swiftparse_token_kind_t kind;
+  /// Represents the range for the node, including trivia.
+  swiftparse_range_t range;
 } swiftparse_token_data_t;
 
 typedef struct {
@@ -115,9 +117,6 @@ typedef struct {
     swiftparse_token_data_t token_data;
     swiftparse_layout_data_t layout_data;
   };
-  /// Represents the range for the node. For a token node the range includes
-  /// the trivia associated with it.
-  swiftparse_range_t range;
   /// The syntax kind. A value of '0' means this is a token node.
   swiftparse_syntax_kind_t kind;
   bool present;

diff --git a/tools/libSwiftSyntaxParser/libSwiftSyntaxParser.cpp b/tools/libSwiftSyntaxParser/libSwiftSyntaxParser.cpp
@@ -150,7 +150,7 @@ class CLibParseActions : public SyntaxParseActions {
     node.token_data.trailing_trivia_count = trailingTrivia.size();
     assert(node.token_data.trailing_trivia_count == trailingTrivia.size() &&
            "trailing trivia count value is too large");
-    makeCRange(node.range, range);
+    makeCRange(node.token_data.range, range);
     node.present = true;
   }
 
@@ -186,7 +186,6 @@ class CLibParseActions : public SyntaxParseActions {
     node.layout_data.nodes =
         const_cast<const swiftparse_client_node_t *>(elements.data());
     node.layout_data.nodes_count = elements.size();
-    makeCRange(node.range, range);
     node.present = true;
     return getNodeHandler()(&node);
   }

diff --git a/tools/swift-syntax-parser-test/swift-syntax-parser-test.cpp b/tools/swift-syntax-parser-test/swift-syntax-parser-test.cpp
@@ -58,7 +58,6 @@ NumParses("n", cl::desc("number of invocations"), cl::init(1));
 namespace {
 struct SPNode {
   swiftparse_syntax_kind_t kind;
-  StringRef nodeText;
 
   Optional<swiftparse_token_kind_t> tokKind;
   StringRef leadingTriviaText;
@@ -111,21 +110,20 @@ static swiftparse_client_node_t
 makeNode(const swiftparse_syntax_node_t *raw_node, StringRef source) {
   SPNode *node = new SPNode();
   node->kind = raw_node->kind;
-  auto range = raw_node->range;
-  node->nodeText = source.substr(range.offset, range.length);
   if (raw_node->kind == 0) {
+    auto range = raw_node->token_data.range;
+    auto nodeText = source.substr(range.offset, range.length);
     node->tokKind = raw_node->token_data.kind;
     size_t leadingTriviaLen =
       trivialLen(makeArrayRef(raw_node->token_data.leading_trivia,
                               raw_node->token_data.leading_trivia_count));
     size_t trailingTriviaLen =
       trivialLen(makeArrayRef(raw_node->token_data.trailing_trivia,
                               raw_node->token_data.trailing_trivia_count));
-    node->leadingTriviaText = node->nodeText.take_front(leadingTriviaLen);
-    node->tokenText =
-      node->nodeText.substr(leadingTriviaLen,
-                            range.length-leadingTriviaLen-trailingTriviaLen);
-    node->trailingTriviaText = node->nodeText.take_back(trailingTriviaLen);
+    node->leadingTriviaText = nodeText.take_front(leadingTriviaLen);
+    node->tokenText = nodeText.substr(
+        leadingTriviaLen, range.length - leadingTriviaLen - trailingTriviaLen);
+    node->trailingTriviaText = nodeText.take_back(trailingTriviaLen);
   } else {
     for (unsigned i = 0, e = raw_node->layout_data.nodes_count; i != e; ++i) {
       auto subnode = convertClientNode(raw_node->layout_data.nodes[i]);

diff --git a/unittests/SyntaxParser/CMakeLists.txt b/unittests/SyntaxParser/CMakeLists.txt
@@ -10,7 +10,8 @@ endif()
 
 target_link_libraries(SwiftSyntaxParserTests
   PRIVATE
-  libSwiftSyntaxParser)
+  libSwiftSyntaxParser
+  swiftSyntax)
 
 if(CMAKE_SYSTEM_NAME STREQUAL Darwin)
   set_target_properties(SwiftSyntaxParserTests PROPERTIES

diff --git a/unittests/SyntaxParser/SyntaxParserTests.cpp b/unittests/SyntaxParser/SyntaxParserTests.cpp
@@ -13,10 +13,13 @@
 #include "swift-c/SyntaxParser/SwiftSyntaxParser.h"
 #include "swift/Basic/LLVM.h"
 #include "llvm/ADT/StringRef.h"
+#include "swift/Syntax/Serialization/SyntaxSerialization.h"
 #include <vector>
 #include "gtest/gtest.h"
 
 using namespace swift;
+using namespace swift::syntax;
+using namespace serialization;
 
 static swiftparse_client_node_t
 parse(StringRef source, swiftparse_node_handler_t node_handler,
@@ -29,63 +32,110 @@ parse(StringRef source, swiftparse_node_handler_t node_handler,
   return top;
 }
 
+static bool containsChild(swiftparse_layout_data_t layout_data, void *child) {
+  for (size_t i = 0; i < layout_data.nodes_count; i++) {
+    if (layout_data.nodes[i] == child) {
+      return true;
+    }
+  }
+  return false;
+}
+
 TEST(SwiftSyntaxParserTests, IncrementalParsing) {
   StringRef source1 =
   "func t1() { }\n"
-  "func t2() { }\n";
+  "func t2() { }\n"
+  "func t3() { }\n";
+
   StringRef source2 =
   "func t1renamed() { }\n"
-  "func t2() { }\n";
+  "func t2() { }\n"
+  "func t3() { }\n";
+
+  swiftparse_syntax_kind_t token = getNumericValue(SyntaxKind::Token);
+  swiftparse_syntax_kind_t functionDecl = getNumericValue(SyntaxKind::FunctionDecl);
+  swiftparse_syntax_kind_t codeBlockItem = getNumericValue(SyntaxKind::CodeBlockItem);
+  swiftparse_syntax_kind_t codeBlockItemList = getNumericValue(SyntaxKind::CodeBlockItemList);
+
+  // Set up a bunch of node ids that we can later use.
+  void *t1Token = &t1Token;
+  void *t1Func = &t1Func;
+  void *t1CodeBlockItem = &t1CodeBlockItem;
+  void *t2Token = &t2Token;
+  void *t2Func = &t2Func;
+  void *t2CodeBlockItem = &t2CodeBlockItem;
+  void *t3Token = &t3Token;
+  void *t3Func = &t3Func;
+  void *t3CodeBlockItem = &t3CodeBlockItem;
 
-  // FIXME: Use the syntax kind directly instead of the serialization number.
-  swiftparse_syntax_kind_t codeBlockItemList = 163;
-  swiftparse_syntax_kind_t codeBlockItem = 92;
+  // Find the t1/t2/t3 tokens in the source
+  size_t t1TokenOffset = StringRef(source1).find("t1");
+  size_t t2TokenOffset = StringRef(source1).find("t2");
+  size_t t3TokenOffset = StringRef(source1).find("t3");
 
-  // Assign id numbers to codeBlockItem nodes and collect the ids that are
-  // listed as members of a codeBlockItemList node into a vector.
-  // When we reparse, check that we got the parser to resuse the node id from
-  // the previous parse.
+  // The length of the t2/t3 code block items
+  size_t t2CodeBlockItemLength = 14;
+  size_t t3CodeBlockItemLength = 14;
+
+  // Collect the node ids of the code block items in this list and verify that
+  // t2 and t3 get reused after the edit from source1 to source2.
+  __block std::vector<void *> codeBlockItemIds;
 
-  __block std::vector<int> nodeids;
-  __block int idcounter = 0;
-  size_t t2Offset = StringRef(source1).find("\nfunc t2");
-  __block int t2NodeId = 0;
-  __block size_t t2NodeLength = 0;
   swiftparse_node_handler_t nodeHandler =
     ^swiftparse_client_node_t(const swiftparse_syntax_node_t *raw_node) {
-      if (raw_node->kind == codeBlockItem) {
-        int nodeid = ++idcounter;
-        if (raw_node->range.offset == t2Offset) {
-          t2NodeId = nodeid;
-          t2NodeLength = raw_node->range.length;
+      if (raw_node->kind == token) {
+        if (raw_node->token_data.range.offset == t1TokenOffset) {
+          return t1Token;
+        } else if (raw_node->token_data.range.offset == t2TokenOffset) {
+          return t2Token;
+        } else if (raw_node->token_data.range.offset == t3TokenOffset) {
+          return t3Token;
         }
-        return (void*)(intptr_t)nodeid;
-      }
-      if (raw_node->kind == codeBlockItemList) {
+      } else if (raw_node->kind == functionDecl) {
+        if (containsChild(raw_node->layout_data, t1Token)) {
+          return t1Func;
+        } else if (containsChild(raw_node->layout_data, t2Token)) {
+          return t2Func;
+        } else if (containsChild(raw_node->layout_data, t3Token)) {
+          return t3Func;
+        }
+      } else if (raw_node->kind == codeBlockItem) {
+        if (containsChild(raw_node->layout_data, t1Func)) {
+          return t1CodeBlockItem;
+        } else if (containsChild(raw_node->layout_data, t2Func)) {
+          return t2CodeBlockItem;
+        } else if (containsChild(raw_node->layout_data, t3Func)) {
+          return t3CodeBlockItem;
+        }
+      } else if (raw_node->kind == codeBlockItemList) {
         for (unsigned i = 0, e = raw_node->layout_data.nodes_count;
              i != e; ++i) {
-          nodeids.push_back((int)(intptr_t)raw_node->layout_data.nodes[i]);
+          codeBlockItemIds.push_back(raw_node->layout_data.nodes[i]);
         }
       }
       return nullptr;
     };
-  parse(source1, nodeHandler, nullptr);
-  EXPECT_EQ(t2NodeId, 2);
-  ASSERT_NE(t2NodeLength, size_t(0));
-  EXPECT_EQ(nodeids, (std::vector<int>{1, 2}));
+  parse(source1, nodeHandler, /*node_lookup=*/nullptr);
+  ASSERT_NE(t2CodeBlockItemLength, size_t(0));
+  EXPECT_EQ(codeBlockItemIds, (std::vector<void *>{t1CodeBlockItem, t2CodeBlockItem, t3CodeBlockItem}));
 
-  nodeids.clear();
-  idcounter = 1000;
-  t2Offset = StringRef(source2).find("\nfunc t2");
+  codeBlockItemIds.clear();
+  size_t t2CodeBlockItemOffset = StringRef(source2).find("\nfunc t2");
+  size_t t3CodeBlockItemOffset = StringRef(source2).find("\nfunc t3");
   swiftparse_node_lookup_t nodeLookup =
     ^swiftparse_lookup_result_t(size_t offset, swiftparse_syntax_kind_t kind) {
-      if (offset == t2Offset && kind == codeBlockItem) {
-        return { t2NodeLength, (void*)(intptr_t)t2NodeId };
-      } else {
-        return {0, nullptr};
+      if (kind == codeBlockItem) {
+        if (offset == t2CodeBlockItemOffset) {
+          return { t2CodeBlockItemLength, t2CodeBlockItem };
+        } else if (offset == t3CodeBlockItemOffset) {
+          return { t3CodeBlockItemLength, t3CodeBlockItem };
+        }
       }
+      return {0, nullptr};
     };
 
   parse(source2, nodeHandler, nodeLookup);
-  EXPECT_EQ(nodeids, (std::vector<int>{1001, 2}));
+  // Assert that t2 and t3 get reused.
+  EXPECT_EQ(codeBlockItemIds[1], t2CodeBlockItem);
+  EXPECT_EQ(codeBlockItemIds[2], t3CodeBlockItem);
 }