Skip to content

[libSyntax] Support serializing the syntax tree as ByteTree #18690

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 14, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions tools/SourceKit/include/SourceKit/Core/LangSupport.h
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,8 @@ enum class SyntaxTreeTransferMode {
Full
};

enum class SyntaxTreeSerializationFormat { JSON, ByteTree };

class EditorConsumer {
virtual void anchor();
public:
Expand Down
93 changes: 85 additions & 8 deletions tools/SourceKit/tools/sourcekitd/lib/API/Requests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/BinaryByteStream.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/NativeFormatting.h"
#include "llvm/Support/Path.h"
Expand Down Expand Up @@ -77,6 +78,8 @@ struct SKEditorConsumerOptions {
bool EnableStructure = false;
bool EnableDiagnostics = false;
SyntaxTreeTransferMode SyntaxTransferMode = SyntaxTreeTransferMode::Off;
SyntaxTreeSerializationFormat SyntaxSerializationFormat =
SyntaxTreeSerializationFormat::JSON;
bool SyntacticOnly = false;
bool EnableSyntaxReuseInfo = false;
};
Expand Down Expand Up @@ -263,6 +266,20 @@ static SyntaxTreeTransferMode syntaxTransferModeFromUID(sourcekitd_uid_t UID) {
}
}

static llvm::Optional<SyntaxTreeSerializationFormat>
syntaxSerializationFormatFromUID(sourcekitd_uid_t UID) {
if (UID == nullptr) {
// Default is JSON
return SyntaxTreeSerializationFormat::JSON;
} else if (UID == KindSyntaxTreeSerializationJSON) {
return SyntaxTreeSerializationFormat::JSON;
} else if (UID == KindSyntaxTreeSerializationByteTree) {
return SyntaxTreeSerializationFormat::ByteTree;
} else {
return llvm::None;
}
}

static void handleRequestImpl(sourcekitd_object_t Req,
ResponseReceiver Receiver);

Expand Down Expand Up @@ -451,6 +468,7 @@ void handleRequestImpl(sourcekitd_object_t ReqObj, ResponseReceiver Rec) {
int64_t EnableDiagnostics = true;
Req.getInt64(KeyEnableDiagnostics, EnableDiagnostics, /*isOptional=*/true);
auto TransferModeUID = Req.getUID(KeySyntaxTreeTransferMode);
auto SerializationFormatUID = Req.getUID(KeySyntaxTreeSerializationFormat);
int64_t SyntacticOnly = false;
Req.getInt64(KeySyntacticOnly, SyntacticOnly, /*isOptional=*/true);
int64_t EnableSyntaxReuseInfo = false;
Expand All @@ -462,6 +480,11 @@ void handleRequestImpl(sourcekitd_object_t ReqObj, ResponseReceiver Rec) {
Opts.EnableStructure = EnableStructure;
Opts.EnableDiagnostics = EnableDiagnostics;
Opts.SyntaxTransferMode = syntaxTransferModeFromUID(TransferModeUID);
auto SyntaxSerializationFormat =
syntaxSerializationFormatFromUID(SerializationFormatUID);
if (!SyntaxSerializationFormat)
return Rec(createErrorRequestFailed("Invalid serialization format"));
Opts.SyntaxSerializationFormat = SyntaxSerializationFormat.getValue();
Opts.SyntacticOnly = SyntacticOnly;
Opts.EnableSyntaxReuseInfo = EnableSyntaxReuseInfo;
return Rec(editorOpen(*Name, InputBuf.get(), Opts, Args));
Expand Down Expand Up @@ -494,18 +517,24 @@ void handleRequestImpl(sourcekitd_object_t ReqObj, ResponseReceiver Rec) {
Req.getInt64(KeyEnableStructure, EnableStructure, /*isOptional=*/true);
int64_t EnableDiagnostics = true;
Req.getInt64(KeyEnableDiagnostics, EnableDiagnostics, /*isOptional=*/true);
auto TransferModeUID = Req.getUID(KeySyntaxTreeTransferMode);
int64_t SyntacticOnly = false;
Req.getInt64(KeySyntacticOnly, SyntacticOnly, /*isOptional=*/true);
int64_t EnableSyntaxReuseInfo = false;
Req.getInt64(KeyEnableSyntaxReuseRegions, EnableSyntaxReuseInfo,
/*isOptional=*/true);
auto TransferModeUID = Req.getUID(KeySyntaxTreeTransferMode);
auto SerializationFormatUID = Req.getUID(KeySyntaxTreeSerializationFormat);

SKEditorConsumerOptions Opts;
Opts.EnableSyntaxMap = EnableSyntaxMap;
Opts.EnableStructure = EnableStructure;
Opts.EnableDiagnostics = EnableDiagnostics;
Opts.SyntaxTransferMode = syntaxTransferModeFromUID(TransferModeUID);
auto SyntaxSerializationFormat =
syntaxSerializationFormatFromUID(SerializationFormatUID);
if (!SyntaxSerializationFormat)
return Rec(createErrorRequestFailed("Invalid serialization format"));
Opts.SyntaxSerializationFormat = SyntaxSerializationFormat.getValue();
Opts.EnableSyntaxReuseInfo = EnableSyntaxReuseInfo;
Opts.SyntacticOnly = SyntacticOnly;
Opts.EnableSyntaxReuseInfo = EnableSyntaxReuseInfo;
Expand Down Expand Up @@ -2420,11 +2449,39 @@ void SKEditorConsumer::handleSourceText(StringRef Text) {
Dict.set(KeySourceText, Text);
}

void serializeSyntaxTreeAsJson(
void serializeSyntaxTreeAsByteTree(
const swift::syntax::SourceFileSyntax &SyntaxTree,
std::unordered_set<unsigned> &ReusedNodeIds,
ResponseBuilder::Dictionary &Dict) {
auto StartClock = clock();
// Serialize the syntax tree as a ByteTree
llvm::AppendingBinaryByteStream Stream(llvm::support::endianness::little);
llvm::BinaryStreamWriter Writer(Stream);
swift::byteTree::ByteTreeWriter::write(/*ProtocolVersion=*/1, Writer,
*SyntaxTree.getRaw());

std::unique_ptr<llvm::WritableMemoryBuffer> Buf =
llvm::WritableMemoryBuffer::getNewUninitMemBuffer(Stream.data().size());
memcpy(Buf->getBufferStart(), Stream.data().data(), Stream.data().size());

Dict.setCustomBuffer(KeySerializedSyntaxTree, CustomBufferKind::RawData,
std::move(Buf));

auto EndClock = clock();
LOG_SECTION("incrParse Performance", InfoLowPrio) {
Log->getOS() << "Serialized " << Stream.data().size()
<< " bytes as ByteTree in ";
auto Seconds = (double)(EndClock - StartClock) * 1000 / CLOCKS_PER_SEC;
llvm::write_double(Log->getOS(), Seconds, llvm::FloatStyle::Fixed, 2);
Log->getOS() << "ms";
}
}

void serializeSyntaxTreeAsJson(
const swift::syntax::SourceFileSyntax &SyntaxTree,
std::unordered_set<unsigned> ReusedNodeIds,
ResponseBuilder::Dictionary &Dict) {
auto StartClock = clock();
// 4096 is a heuristic buffer size that appears to usually be able to fit an
// incremental syntax tree
size_t ReserveBufferSize = 4096;
Expand All @@ -2443,21 +2500,41 @@ void serializeSyntaxTreeAsJson(

auto EndClock = clock();
LOG_SECTION("incrParse Performance", InfoLowPrio) {
Log->getOS() << "Serialized " << SyntaxTreeString.size() << " bytes in ";
llvm::write_double(Log->getOS(),
(double)(EndClock - StartClock) * 1000 / CLOCKS_PER_SEC,
llvm::FloatStyle::Fixed, 2);
Log->getOS() << "Serialized " << SyntaxTreeString.size()
<< " bytes as JSON in ";
auto Seconds = (double)(EndClock - StartClock) * 1000 / CLOCKS_PER_SEC;
llvm::write_double(Log->getOS(), Seconds, llvm::FloatStyle::Fixed, 2);
Log->getOS() << "ms";
}
}

void SKEditorConsumer::handleSyntaxTree(
const swift::syntax::SourceFileSyntax &SyntaxTree,
std::unordered_set<unsigned> &ReusedNodeIds) {
if (Opts.SyntaxTransferMode == SyntaxTreeTransferMode::Off)

std::unordered_set<unsigned> OmitNodes;
switch (Opts.SyntaxTransferMode) {
case SourceKit::SyntaxTreeTransferMode::Off:
// Don't serialize the tree at all
return;
case SourceKit::SyntaxTreeTransferMode::Full:
// Serialize the tree without omitting any nodes
OmitNodes = {};
break;
case SourceKit::SyntaxTreeTransferMode::Incremental:
// Serialize the tree and omit all nodes that have been reused
OmitNodes = ReusedNodeIds;
break;
}

serializeSyntaxTreeAsJson(SyntaxTree, ReusedNodeIds, Dict);
switch (Opts.SyntaxSerializationFormat) {
case SourceKit::SyntaxTreeSerializationFormat::JSON:
serializeSyntaxTreeAsJson(SyntaxTree, OmitNodes, Dict);
break;
case SourceKit::SyntaxTreeSerializationFormat::ByteTree:
serializeSyntaxTreeAsByteTree(SyntaxTree, OmitNodes, Dict);
break;
}
}

void SKEditorConsumer::handleSyntaxReuseRegions(
Expand Down
6 changes: 6 additions & 0 deletions utils/gyb_sourcekit_support/UIDs.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ def __init__(self, internal_name, external_name):
KEY('EnableSyntaxReuseRegions', 'key.enablesyntaxreuseregions'),
KEY('EnableSyntaxMap', 'key.enablesyntaxmap'),
KEY('SyntaxTreeTransferMode', 'key.syntaxtreetransfermode'),
KEY('SyntaxTreeSerializationFormat',
'key.syntax_tree_serialization_format'),
KEY('EnableStructure', 'key.enablesubstructure'),
KEY('Description', 'key.description'),
KEY('TypeName', 'key.typename'),
Expand Down Expand Up @@ -413,4 +415,8 @@ def __init__(self, internal_name, external_name):
KIND('SyntaxTreeOff', 'source.syntaxtree.transfer.off'),
KIND('SyntaxTreeIncremental', 'source.syntaxtree.transfer.incremental'),
KIND('SyntaxTreeFull', 'source.syntaxtree.transfer.full'),
KIND('SyntaxTreeSerializationJSON',
'source.syntaxtree.serialization.format.json'),
KIND('SyntaxTreeSerializationByteTree',
'source.syntaxtree.serialization.format.bytetree'),
]