Skip to content

[libSyntax] Store range in token_data in C lib parse actions #36249

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Mar 8, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions include/swift-c/SyntaxParser/SwiftSyntaxParser.h
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,8 @@ typedef struct {
uint16_t leading_trivia_count;
uint16_t trailing_trivia_count;
swiftparse_token_kind_t kind;
/// Represents the range for the node, including trivia.
swiftparse_range_t range;
} swiftparse_token_data_t;

typedef struct {
Expand All @@ -115,9 +117,6 @@ typedef struct {
swiftparse_token_data_t token_data;
swiftparse_layout_data_t layout_data;
};
/// Represents the range for the node. For a token node the range includes
/// the trivia associated with it.
swiftparse_range_t range;
/// The syntax kind. A value of '0' means this is a token node.
swiftparse_syntax_kind_t kind;
bool present;
Expand Down
3 changes: 1 addition & 2 deletions tools/libSwiftSyntaxParser/libSwiftSyntaxParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ class CLibParseActions : public SyntaxParseActions {
node.token_data.trailing_trivia_count = trailingTrivia.size();
assert(node.token_data.trailing_trivia_count == trailingTrivia.size() &&
"trailing trivia count value is too large");
makeCRange(node.range, range);
makeCRange(node.token_data.range, range);
node.present = true;
}

Expand Down Expand Up @@ -186,7 +186,6 @@ class CLibParseActions : public SyntaxParseActions {
node.layout_data.nodes =
const_cast<const swiftparse_client_node_t *>(elements.data());
node.layout_data.nodes_count = elements.size();
makeCRange(node.range, range);
node.present = true;
return getNodeHandler()(&node);
}
Expand Down
14 changes: 6 additions & 8 deletions tools/swift-syntax-parser-test/swift-syntax-parser-test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@ NumParses("n", cl::desc("number of invocations"), cl::init(1));
namespace {
struct SPNode {
swiftparse_syntax_kind_t kind;
StringRef nodeText;

Optional<swiftparse_token_kind_t> tokKind;
StringRef leadingTriviaText;
Expand Down Expand Up @@ -111,21 +110,20 @@ static swiftparse_client_node_t
makeNode(const swiftparse_syntax_node_t *raw_node, StringRef source) {
SPNode *node = new SPNode();
node->kind = raw_node->kind;
auto range = raw_node->range;
node->nodeText = source.substr(range.offset, range.length);
if (raw_node->kind == 0) {
auto range = raw_node->token_data.range;
auto nodeText = source.substr(range.offset, range.length);
node->tokKind = raw_node->token_data.kind;
size_t leadingTriviaLen =
trivialLen(makeArrayRef(raw_node->token_data.leading_trivia,
raw_node->token_data.leading_trivia_count));
size_t trailingTriviaLen =
trivialLen(makeArrayRef(raw_node->token_data.trailing_trivia,
raw_node->token_data.trailing_trivia_count));
node->leadingTriviaText = node->nodeText.take_front(leadingTriviaLen);
node->tokenText =
node->nodeText.substr(leadingTriviaLen,
range.length-leadingTriviaLen-trailingTriviaLen);
node->trailingTriviaText = node->nodeText.take_back(trailingTriviaLen);
node->leadingTriviaText = nodeText.take_front(leadingTriviaLen);
node->tokenText = nodeText.substr(
leadingTriviaLen, range.length - leadingTriviaLen - trailingTriviaLen);
node->trailingTriviaText = nodeText.take_back(trailingTriviaLen);
} else {
for (unsigned i = 0, e = raw_node->layout_data.nodes_count; i != e; ++i) {
auto subnode = convertClientNode(raw_node->layout_data.nodes[i]);
Expand Down
3 changes: 2 additions & 1 deletion unittests/SyntaxParser/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ endif()

target_link_libraries(SwiftSyntaxParserTests
PRIVATE
libSwiftSyntaxParser)
libSwiftSyntaxParser
swiftSyntax)

if(CMAKE_SYSTEM_NAME STREQUAL Darwin)
set_target_properties(SwiftSyntaxParserTests PROPERTIES
Expand Down
120 changes: 85 additions & 35 deletions unittests/SyntaxParser/SyntaxParserTests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,13 @@
#include "swift-c/SyntaxParser/SwiftSyntaxParser.h"
#include "swift/Basic/LLVM.h"
#include "llvm/ADT/StringRef.h"
#include "swift/Syntax/Serialization/SyntaxSerialization.h"
#include <vector>
#include "gtest/gtest.h"

using namespace swift;
using namespace swift::syntax;
using namespace serialization;

static swiftparse_client_node_t
parse(StringRef source, swiftparse_node_handler_t node_handler,
Expand All @@ -29,63 +32,110 @@ parse(StringRef source, swiftparse_node_handler_t node_handler,
return top;
}

static bool containsChild(swiftparse_layout_data_t layout_data, void *child) {
for (size_t i = 0; i < layout_data.nodes_count; i++) {
if (layout_data.nodes[i] == child) {
return true;
}
}
return false;
}

TEST(SwiftSyntaxParserTests, IncrementalParsing) {
StringRef source1 =
"func t1() { }\n"
"func t2() { }\n";
"func t2() { }\n"
"func t3() { }\n";

StringRef source2 =
"func t1renamed() { }\n"
"func t2() { }\n";
"func t2() { }\n"
"func t3() { }\n";

swiftparse_syntax_kind_t token = getNumericValue(SyntaxKind::Token);
swiftparse_syntax_kind_t functionDecl = getNumericValue(SyntaxKind::FunctionDecl);
swiftparse_syntax_kind_t codeBlockItem = getNumericValue(SyntaxKind::CodeBlockItem);
swiftparse_syntax_kind_t codeBlockItemList = getNumericValue(SyntaxKind::CodeBlockItemList);

// Set up a bunch of node ids that we can later use.
void *t1Token = &t1Token;
void *t1Func = &t1Func;
void *t1CodeBlockItem = &t1CodeBlockItem;
void *t2Token = &t2Token;
void *t2Func = &t2Func;
void *t2CodeBlockItem = &t2CodeBlockItem;
void *t3Token = &t3Token;
void *t3Func = &t3Func;
void *t3CodeBlockItem = &t3CodeBlockItem;

// FIXME: Use the syntax kind directly instead of the serialization number.
swiftparse_syntax_kind_t codeBlockItemList = 163;
swiftparse_syntax_kind_t codeBlockItem = 92;
// Find the t1/t2/t3 tokens in the source
size_t t1TokenOffset = StringRef(source1).find("t1");
size_t t2TokenOffset = StringRef(source1).find("t2");
size_t t3TokenOffset = StringRef(source1).find("t3");

// Assign id numbers to codeBlockItem nodes and collect the ids that are
// listed as members of a codeBlockItemList node into a vector.
// When we reparse, check that we got the parser to resuse the node id from
// the previous parse.
// The length of the t2/t3 code block items
size_t t2CodeBlockItemLength = 14;
size_t t3CodeBlockItemLength = 14;

// Collect the node ids of the code block items in this list and verify that
// t2 and t3 get reused after the edit from source1 to source2.
__block std::vector<void *> codeBlockItemIds;

__block std::vector<int> nodeids;
__block int idcounter = 0;
size_t t2Offset = StringRef(source1).find("\nfunc t2");
__block int t2NodeId = 0;
__block size_t t2NodeLength = 0;
swiftparse_node_handler_t nodeHandler =
^swiftparse_client_node_t(const swiftparse_syntax_node_t *raw_node) {
if (raw_node->kind == codeBlockItem) {
int nodeid = ++idcounter;
if (raw_node->range.offset == t2Offset) {
t2NodeId = nodeid;
t2NodeLength = raw_node->range.length;
if (raw_node->kind == token) {
if (raw_node->token_data.range.offset == t1TokenOffset) {
return t1Token;
} else if (raw_node->token_data.range.offset == t2TokenOffset) {
return t2Token;
} else if (raw_node->token_data.range.offset == t3TokenOffset) {
return t3Token;
}
return (void*)(intptr_t)nodeid;
}
if (raw_node->kind == codeBlockItemList) {
} else if (raw_node->kind == functionDecl) {
if (containsChild(raw_node->layout_data, t1Token)) {
return t1Func;
} else if (containsChild(raw_node->layout_data, t2Token)) {
return t2Func;
} else if (containsChild(raw_node->layout_data, t3Token)) {
return t3Func;
}
} else if (raw_node->kind == codeBlockItem) {
if (containsChild(raw_node->layout_data, t1Func)) {
return t1CodeBlockItem;
} else if (containsChild(raw_node->layout_data, t2Func)) {
return t2CodeBlockItem;
} else if (containsChild(raw_node->layout_data, t3Func)) {
return t3CodeBlockItem;
}
} else if (raw_node->kind == codeBlockItemList) {
for (unsigned i = 0, e = raw_node->layout_data.nodes_count;
i != e; ++i) {
nodeids.push_back((int)(intptr_t)raw_node->layout_data.nodes[i]);
codeBlockItemIds.push_back(raw_node->layout_data.nodes[i]);
}
}
return nullptr;
};
parse(source1, nodeHandler, nullptr);
EXPECT_EQ(t2NodeId, 2);
ASSERT_NE(t2NodeLength, size_t(0));
EXPECT_EQ(nodeids, (std::vector<int>{1, 2}));
parse(source1, nodeHandler, /*node_lookup=*/nullptr);
ASSERT_NE(t2CodeBlockItemLength, size_t(0));
EXPECT_EQ(codeBlockItemIds, (std::vector<void *>{t1CodeBlockItem, t2CodeBlockItem, t3CodeBlockItem}));

nodeids.clear();
idcounter = 1000;
t2Offset = StringRef(source2).find("\nfunc t2");
codeBlockItemIds.clear();
size_t t2CodeBlockItemOffset = StringRef(source2).find("\nfunc t2");
size_t t3CodeBlockItemOffset = StringRef(source2).find("\nfunc t3");
swiftparse_node_lookup_t nodeLookup =
^swiftparse_lookup_result_t(size_t offset, swiftparse_syntax_kind_t kind) {
if (offset == t2Offset && kind == codeBlockItem) {
return { t2NodeLength, (void*)(intptr_t)t2NodeId };
} else {
return {0, nullptr};
if (kind == codeBlockItem) {
if (offset == t2CodeBlockItemOffset) {
return { t2CodeBlockItemLength, t2CodeBlockItem };
} else if (offset == t3CodeBlockItemOffset) {
return { t3CodeBlockItemLength, t3CodeBlockItem };
}
}
return {0, nullptr};
};

parse(source2, nodeHandler, nodeLookup);
EXPECT_EQ(nodeids, (std::vector<int>{1001, 2}));
// Assert that t2 and t3 get reused.
EXPECT_EQ(codeBlockItemIds[1], t2CodeBlockItem);
EXPECT_EQ(codeBlockItemIds[2], t3CodeBlockItem);
}