Skip to content

Commit 4cda5cb

Browse files
committed
[libSyntax] Support serializing the syntax tree as ByteTree
1 parent 4369b36 commit 4cda5cb

File tree

3 files changed

+93
-8
lines changed

3 files changed

+93
-8
lines changed

tools/SourceKit/include/SourceKit/Core/LangSupport.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,8 @@ enum class SyntaxTreeTransferMode {
212212
Full
213213
};
214214

215+
enum class SyntaxTreeSerializationFormat { JSON, ByteTree };
216+
215217
class EditorConsumer {
216218
virtual void anchor();
217219
public:

tools/SourceKit/tools/sourcekitd/lib/API/Requests.cpp

Lines changed: 85 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
#include "llvm/ADT/SmallString.h"
3737
#include "llvm/ADT/StringRef.h"
3838
#include "llvm/ADT/STLExtras.h"
39+
#include "llvm/Support/BinaryByteStream.h"
3940
#include "llvm/Support/MemoryBuffer.h"
4041
#include "llvm/Support/NativeFormatting.h"
4142
#include "llvm/Support/Path.h"
@@ -77,6 +78,8 @@ struct SKEditorConsumerOptions {
7778
bool EnableStructure = false;
7879
bool EnableDiagnostics = false;
7980
SyntaxTreeTransferMode SyntaxTransferMode = SyntaxTreeTransferMode::Off;
81+
SyntaxTreeSerializationFormat SyntaxSerializationFormat =
82+
SyntaxTreeSerializationFormat::JSON;
8083
bool SyntacticOnly = false;
8184
bool EnableSyntaxReuseInfo = false;
8285
};
@@ -263,6 +266,20 @@ static SyntaxTreeTransferMode syntaxTransferModeFromUID(sourcekitd_uid_t UID) {
263266
}
264267
}
265268

269+
static llvm::Optional<SyntaxTreeSerializationFormat>
270+
syntaxSerializationFormatFromUID(sourcekitd_uid_t UID) {
271+
if (UID == nullptr) {
272+
// Default is JSON
273+
return SyntaxTreeSerializationFormat::JSON;
274+
} else if (UID == KindSyntaxTreeSerializationJSON) {
275+
return SyntaxTreeSerializationFormat::JSON;
276+
} else if (UID == KindSyntaxTreeSerializationByteTree) {
277+
return SyntaxTreeSerializationFormat::ByteTree;
278+
} else {
279+
return llvm::None;
280+
}
281+
}
282+
266283
static void handleRequestImpl(sourcekitd_object_t Req,
267284
ResponseReceiver Receiver);
268285

@@ -451,6 +468,7 @@ void handleRequestImpl(sourcekitd_object_t ReqObj, ResponseReceiver Rec) {
451468
int64_t EnableDiagnostics = true;
452469
Req.getInt64(KeyEnableDiagnostics, EnableDiagnostics, /*isOptional=*/true);
453470
auto TransferModeUID = Req.getUID(KeySyntaxTreeTransferMode);
471+
auto SerializationFormatUID = Req.getUID(KeySyntaxTreeSerializationFormat);
454472
int64_t SyntacticOnly = false;
455473
Req.getInt64(KeySyntacticOnly, SyntacticOnly, /*isOptional=*/true);
456474
int64_t EnableSyntaxReuseInfo = false;
@@ -462,6 +480,11 @@ void handleRequestImpl(sourcekitd_object_t ReqObj, ResponseReceiver Rec) {
462480
Opts.EnableStructure = EnableStructure;
463481
Opts.EnableDiagnostics = EnableDiagnostics;
464482
Opts.SyntaxTransferMode = syntaxTransferModeFromUID(TransferModeUID);
483+
auto SyntaxSerializationFormat =
484+
syntaxSerializationFormatFromUID(SerializationFormatUID);
485+
if (!SyntaxSerializationFormat)
486+
return Rec(createErrorRequestFailed("Invalid serialization format"));
487+
Opts.SyntaxSerializationFormat = SyntaxSerializationFormat.getValue();
465488
Opts.SyntacticOnly = SyntacticOnly;
466489
Opts.EnableSyntaxReuseInfo = EnableSyntaxReuseInfo;
467490
return Rec(editorOpen(*Name, InputBuf.get(), Opts, Args));
@@ -494,18 +517,24 @@ void handleRequestImpl(sourcekitd_object_t ReqObj, ResponseReceiver Rec) {
494517
Req.getInt64(KeyEnableStructure, EnableStructure, /*isOptional=*/true);
495518
int64_t EnableDiagnostics = true;
496519
Req.getInt64(KeyEnableDiagnostics, EnableDiagnostics, /*isOptional=*/true);
497-
auto TransferModeUID = Req.getUID(KeySyntaxTreeTransferMode);
498520
int64_t SyntacticOnly = false;
499521
Req.getInt64(KeySyntacticOnly, SyntacticOnly, /*isOptional=*/true);
500522
int64_t EnableSyntaxReuseInfo = false;
501523
Req.getInt64(KeyEnableSyntaxReuseRegions, EnableSyntaxReuseInfo,
502524
/*isOptional=*/true);
525+
auto TransferModeUID = Req.getUID(KeySyntaxTreeTransferMode);
526+
auto SerializationFormatUID = Req.getUID(KeySyntaxTreeSerializationFormat);
503527

504528
SKEditorConsumerOptions Opts;
505529
Opts.EnableSyntaxMap = EnableSyntaxMap;
506530
Opts.EnableStructure = EnableStructure;
507531
Opts.EnableDiagnostics = EnableDiagnostics;
508532
Opts.SyntaxTransferMode = syntaxTransferModeFromUID(TransferModeUID);
533+
auto SyntaxSerializationFormat =
534+
syntaxSerializationFormatFromUID(SerializationFormatUID);
535+
if (!SyntaxSerializationFormat)
536+
return Rec(createErrorRequestFailed("Invalid serialization format"));
537+
Opts.SyntaxSerializationFormat = SyntaxSerializationFormat.getValue();
509538
Opts.EnableSyntaxReuseInfo = EnableSyntaxReuseInfo;
510539
Opts.SyntacticOnly = SyntacticOnly;
511540
Opts.EnableSyntaxReuseInfo = EnableSyntaxReuseInfo;
@@ -2420,11 +2449,39 @@ void SKEditorConsumer::handleSourceText(StringRef Text) {
24202449
Dict.set(KeySourceText, Text);
24212450
}
24222451

2423-
void serializeSyntaxTreeAsJson(
2452+
void serializeSyntaxTreeAsByteTree(
24242453
const swift::syntax::SourceFileSyntax &SyntaxTree,
24252454
std::unordered_set<unsigned> &ReusedNodeIds,
24262455
ResponseBuilder::Dictionary &Dict) {
24272456
auto StartClock = clock();
2457+
// Serialize the syntax tree as a ByteTree
2458+
llvm::AppendingBinaryByteStream Stream(llvm::support::endianness::little);
2459+
llvm::BinaryStreamWriter Writer(Stream);
2460+
swift::byteTree::ByteTreeWriter::write(/*ProtocolVersion=*/1, Writer,
2461+
*SyntaxTree.getRaw());
2462+
2463+
std::unique_ptr<llvm::WritableMemoryBuffer> Buf =
2464+
llvm::WritableMemoryBuffer::getNewUninitMemBuffer(Stream.data().size());
2465+
memcpy(Buf->getBufferStart(), Stream.data().data(), Stream.data().size());
2466+
2467+
Dict.setCustomBuffer(KeySerializedSyntaxTree, CustomBufferKind::RawData,
2468+
std::move(Buf));
2469+
2470+
auto EndClock = clock();
2471+
LOG_SECTION("incrParse Performance", InfoLowPrio) {
2472+
Log->getOS() << "Serialized " << Stream.data().size()
2473+
<< " bytes as ByteTree in ";
2474+
auto Seconds = (double)(EndClock - StartClock) * 1000 / CLOCKS_PER_SEC;
2475+
llvm::write_double(Log->getOS(), Seconds, llvm::FloatStyle::Fixed, 2);
2476+
Log->getOS() << "ms";
2477+
}
2478+
}
2479+
2480+
void serializeSyntaxTreeAsJson(
2481+
const swift::syntax::SourceFileSyntax &SyntaxTree,
2482+
std::unordered_set<unsigned> ReusedNodeIds,
2483+
ResponseBuilder::Dictionary &Dict) {
2484+
auto StartClock = clock();
24282485
// 4096 is a heuristic buffer size that appears to usually be able to fit an
24292486
// incremental syntax tree
24302487
size_t ReserveBufferSize = 4096;
@@ -2443,21 +2500,41 @@ void serializeSyntaxTreeAsJson(
24432500

24442501
auto EndClock = clock();
24452502
LOG_SECTION("incrParse Performance", InfoLowPrio) {
2446-
Log->getOS() << "Serialized " << SyntaxTreeString.size() << " bytes in ";
2447-
llvm::write_double(Log->getOS(),
2448-
(double)(EndClock - StartClock) * 1000 / CLOCKS_PER_SEC,
2449-
llvm::FloatStyle::Fixed, 2);
2503+
Log->getOS() << "Serialized " << SyntaxTreeString.size()
2504+
<< " bytes as JSON in ";
2505+
auto Seconds = (double)(EndClock - StartClock) * 1000 / CLOCKS_PER_SEC;
2506+
llvm::write_double(Log->getOS(), Seconds, llvm::FloatStyle::Fixed, 2);
24502507
Log->getOS() << "ms";
24512508
}
24522509
}
24532510

24542511
void SKEditorConsumer::handleSyntaxTree(
24552512
const swift::syntax::SourceFileSyntax &SyntaxTree,
24562513
std::unordered_set<unsigned> &ReusedNodeIds) {
2457-
if (Opts.SyntaxTransferMode == SyntaxTreeTransferMode::Off)
2514+
2515+
std::unordered_set<unsigned> OmitNodes;
2516+
switch (Opts.SyntaxTransferMode) {
2517+
case SourceKit::SyntaxTreeTransferMode::Off:
2518+
// Don't serialize the tree at all
24582519
return;
2520+
case SourceKit::SyntaxTreeTransferMode::Full:
2521+
// Serialize the tree without omitting any nodes
2522+
OmitNodes = {};
2523+
break;
2524+
case SourceKit::SyntaxTreeTransferMode::Incremental:
2525+
// Serialize the tree and omit all nodes that have been reused
2526+
OmitNodes = ReusedNodeIds;
2527+
break;
2528+
}
24592529

2460-
serializeSyntaxTreeAsJson(SyntaxTree, ReusedNodeIds, Dict);
2530+
switch (Opts.SyntaxSerializationFormat) {
2531+
case SourceKit::SyntaxTreeSerializationFormat::JSON:
2532+
serializeSyntaxTreeAsJson(SyntaxTree, OmitNodes, Dict);
2533+
break;
2534+
case SourceKit::SyntaxTreeSerializationFormat::ByteTree:
2535+
serializeSyntaxTreeAsByteTree(SyntaxTree, OmitNodes, Dict);
2536+
break;
2537+
}
24612538
}
24622539

24632540
void SKEditorConsumer::handleSyntaxReuseRegions(

utils/gyb_sourcekit_support/UIDs.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,8 @@ def __init__(self, internal_name, external_name):
5252
KEY('EnableSyntaxReuseRegions', 'key.enablesyntaxreuseregions'),
5353
KEY('EnableSyntaxMap', 'key.enablesyntaxmap'),
5454
KEY('SyntaxTreeTransferMode', 'key.syntaxtreetransfermode'),
55+
KEY('SyntaxTreeSerializationFormat',
56+
'key.syntax_tree_serialization_format'),
5557
KEY('EnableStructure', 'key.enablesubstructure'),
5658
KEY('Description', 'key.description'),
5759
KEY('TypeName', 'key.typename'),
@@ -413,4 +415,8 @@ def __init__(self, internal_name, external_name):
413415
KIND('SyntaxTreeOff', 'source.syntaxtree.transfer.off'),
414416
KIND('SyntaxTreeIncremental', 'source.syntaxtree.transfer.incremental'),
415417
KIND('SyntaxTreeFull', 'source.syntaxtree.transfer.full'),
418+
KIND('SyntaxTreeSerializationJSON',
419+
'source.syntaxtree.serialization.format.json'),
420+
KIND('SyntaxTreeSerializationByteTree',
421+
'source.syntaxtree.serialization.format.bytetree'),
416422
]

0 commit comments

Comments
 (0)