Skip to content

Commit 9d6ff6d

Browse files
authored
Merge pull request #16340 from ahoppen/incremental-syntax-parsing
[libSyntax] Incremental Syntax Parsing
2 parents b5180dc + 4e44e68 commit 9d6ff6d

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+1547
-166
lines changed

include/swift/AST/Module.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#include "swift/Basic/OptionSet.h"
2929
#include "swift/Basic/STLExtras.h"
3030
#include "swift/Basic/SourceLoc.h"
31+
#include "swift/Parse/SyntaxParsingCache.h"
3132
#include "llvm/ADT/ArrayRef.h"
3233
#include "llvm/ADT/DenseSet.h"
3334
#include "llvm/ADT/STLExtras.h"
@@ -838,6 +839,10 @@ class SourceFile final : public FileUnit {
838839
/// The list of top-level declarations in the source file.
839840
std::vector<Decl*> Decls;
840841

842+
/// A cache of syntax nodes that can be reused when creating the syntax tree
843+
/// for this file.
844+
SyntaxParsingCache *SyntaxParsingCache = nullptr;
845+
841846
/// The list of local type declarations in the source file.
842847
llvm::SetVector<TypeDecl *> LocalTypeDecls;
843848

include/swift/Basic/LangOptions.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -261,7 +261,9 @@ namespace swift {
261261
/// Whether collect tokens during parsing for syntax coloring.
262262
bool CollectParsedToken = false;
263263

264-
/// Whether to parse syntax tree.
264+
/// Whether to parse syntax tree. If the syntax tree is built, the generated
265+
/// AST may not be correct when syntax nodes are reused as part of
266+
/// incrementals parsing.
265267
bool BuildSyntaxTree = false;
266268

267269
/// Whether to verify the parsed syntax tree and emit related diagnostics.

include/swift/Basic/SourceLoc.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,8 @@ class SourceLoc {
6868
void print(raw_ostream &OS, const SourceManager &SM,
6969
unsigned &LastBufferID) const;
7070

71-
void printLineAndColumn(raw_ostream &OS, const SourceManager &SM) const;
71+
void printLineAndColumn(raw_ostream &OS, const SourceManager &SM,
72+
unsigned BufferID = 0) const;
7273

7374
void print(raw_ostream &OS, const SourceManager &SM) const {
7475
unsigned Tmp = ~0U;

include/swift/Frontend/Frontend.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
#include "swift/Migrator/MigratorOptions.h"
3535
#include "swift/Parse/CodeCompletionCallbacks.h"
3636
#include "swift/Parse/Parser.h"
37+
#include "swift/Parse/SyntaxParsingCache.h"
3738
#include "swift/Sema/SourceLoader.h"
3839
#include "swift/Serialization/Validation.h"
3940
#include "swift/Subsystems.h"
@@ -68,6 +69,9 @@ class CompilerInvocation {
6869
MigratorOptions MigratorOpts;
6970
SILOptions SILOpts;
7071
IRGenOptions IRGenOpts;
72+
/// The \c SyntaxParsingCache to use when parsing the main file of this
73+
/// invocation
74+
SyntaxParsingCache *MainFileSyntaxParsingCache = nullptr;
7175

7276
llvm::MemoryBuffer *CodeCompletionBuffer = nullptr;
7377

@@ -217,6 +221,14 @@ class CompilerInvocation {
217221
IRGenOptions &getIRGenOptions() { return IRGenOpts; }
218222
const IRGenOptions &getIRGenOptions() const { return IRGenOpts; }
219223

224+
void setMainFileSyntaxParsingCache(SyntaxParsingCache *Cache) {
225+
MainFileSyntaxParsingCache = Cache;
226+
}
227+
228+
SyntaxParsingCache *getMainFileSyntaxParsingCache() const {
229+
return MainFileSyntaxParsingCache;
230+
}
231+
220232
void setParseStdlib() {
221233
FrontendOpts.ParseStdlib = true;
222234
}

include/swift/Parse/Lexer.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,15 @@ class Lexer {
192192
lex(Result, LeadingTrivia, TrailingTrivia);
193193
}
194194

195+
/// Reset the lexer's buffer pointer to \p Offset bytes after the buffer
196+
/// start.
197+
void resetToOffset(size_t Offset) {
198+
assert(BufferStart + Offset <= BufferEnd && "Offset after buffer end");
199+
200+
CurPtr = BufferStart + Offset;
201+
lexImpl();
202+
}
203+
195204
bool isKeepingComments() const {
196205
return RetainComments == CommentRetentionMode::ReturnAsTokens;
197206
}

include/swift/Parse/Parser.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -521,6 +521,13 @@ class Parser {
521521
/// \brief Skip until the next '#else', '#endif' or until eof.
522522
void skipUntilConditionalBlockClose();
523523

524+
/// If the parser is generating only a syntax tree, try loading the current
525+
/// node from a previously generated syntax tree.
526+
/// Returns \c true if the node has been loaded and inserted into the current
527+
/// syntax tree. In this case the parser should behave as if the node has
528+
/// successfully been created.
529+
bool loadCurrentSyntaxNodeFromCache();
530+
524531
/// Parse an #endif.
525532
bool parseEndIfDirective(SourceLoc &Loc);
526533

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
//===----------- SyntaxParsingCache.h -================----------*- C++ -*-===//
2+
//
3+
// This source file is part of the Swift.org open source project
4+
//
5+
// Copyright (c) 2014 - 2018 Apple Inc. and the Swift project authors
6+
// Licensed under Apache License v2.0 with Runtime Library Exception
7+
//
8+
// See https://swift.org/LICENSE.txt for license information
9+
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
#ifndef SWIFT_PARSE_SYNTAXPARSINGCACHE_H
14+
#define SWIFT_PARSE_SYNTAXPARSINGCACHE_H
15+
16+
#include "swift/Syntax/SyntaxNodes.h"
17+
#include "llvm/Support/FileSystem.h"
18+
#include "llvm/Support/raw_ostream.h"
19+
20+
namespace {
21+
22+
/// A single edit to the original source file in which a continuous range of
23+
/// characters have been replaced by a new string
24+
struct SourceEdit {
25+
/// The byte offset from which on characters were replaced.
26+
size_t Start;
27+
28+
/// The byte offset to which on characters were replaced.
29+
size_t End;
30+
31+
/// The length of the string that replaced the range described above.
32+
size_t ReplacementLength;
33+
34+
/// The length of the range that has been replaced
35+
size_t originalLength() { return End - Start; }
36+
37+
/// Check if the characters replaced by this edit fall into the given range
38+
/// or are directly adjacent to it
39+
bool intersectsOrTouchesRange(size_t RangeStart, size_t RangeEnd) {
40+
return !(End <= RangeStart || Start >= RangeEnd);
41+
}
42+
};
43+
44+
} // anonymous namespace
45+
46+
namespace swift {
47+
48+
using namespace swift::syntax;
49+
50+
class SyntaxParsingCache {
51+
/// The syntax tree prior to the edit
52+
SourceFileSyntax OldSyntaxTree;
53+
54+
/// The edits that were made from the source file that created this cache to
55+
/// the source file that is now parsed incrementally
56+
llvm::SmallVector<SourceEdit, 4> Edits;
57+
58+
/// Whether or not information about reused nodes shall be recored in
59+
/// \c ReusedRanges
60+
bool RecordReuseInformation = false;
61+
62+
/// If \c RecordReuseInformation buffer offsets of ranges that have been
63+
/// successfully looked up in this cache are stored.
64+
std::vector<std::pair<unsigned, unsigned>> ReusedRanges;
65+
66+
public:
67+
SyntaxParsingCache(SourceFileSyntax OldSyntaxTree)
68+
: OldSyntaxTree(OldSyntaxTree) {}
69+
70+
/// Add an edit that transformed the source file which created this cache into
71+
/// the source file that is now being parsed incrementally. The order in which
72+
/// the edits are added using this method needs to be the same order in which
73+
/// the edits were applied to the source file.
74+
void addEdit(size_t Start, size_t End, size_t ReplacementLength) {
75+
Edits.push_back({Start, End, ReplacementLength});
76+
}
77+
78+
/// Check if a syntax node of the given kind at the given position can be
79+
/// reused for a new syntax tree.
80+
llvm::Optional<Syntax> lookUp(size_t NewPosition, SyntaxKind Kind);
81+
82+
/// Turn recording of reused ranges on
83+
void setRecordReuseInformation() { RecordReuseInformation = true; }
84+
85+
/// Return the ranges of the new source file that have been successfully
86+
/// looked up in this cache as a (start, end) pair of byte offsets in the
87+
/// post-edit file.
88+
std::vector<std::pair<unsigned, unsigned>> getReusedRanges() const {
89+
return ReusedRanges;
90+
}
91+
92+
private:
93+
llvm::Optional<Syntax> lookUpFrom(const Syntax &Node, size_t Position,
94+
SyntaxKind Kind);
95+
96+
bool nodeCanBeReused(const Syntax &Node, size_t Position,
97+
SyntaxKind Kind) const;
98+
};
99+
100+
} // namespace swift
101+
102+
#endif // SWIFT_SYNTAX_PARSING_CACHE_H

include/swift/Parse/SyntaxParsingContext.h

Lines changed: 50 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
namespace swift {
2222
class SourceFile;
23+
class SyntaxParsingCache;
2324
class Token;
2425
class DiagnosticEngine;
2526

@@ -74,9 +75,17 @@ class alignas(1 << SyntaxAlignInBits) SyntaxParsingContext {
7475
// Storage for Collected parts.
7576
std::vector<RC<RawSyntax>> Storage;
7677

78+
SyntaxArena &Arena;
79+
80+
/// A cache of nodes that can be reused when creating the current syntax
81+
/// tree
82+
SyntaxParsingCache *SyntaxCache = nullptr;
83+
7784
RootContextData(SourceFile &SF, DiagnosticEngine &Diags,
78-
SourceManager &SourceMgr, unsigned BufferID)
79-
: SF(SF), Diags(Diags), SourceMgr(SourceMgr), BufferID(BufferID) {}
85+
SourceManager &SourceMgr, unsigned BufferID,
86+
SyntaxArena &Arena, SyntaxParsingCache *SyntaxCache)
87+
: SF(SF), Diags(Diags), SourceMgr(SourceMgr), BufferID(BufferID),
88+
Arena(Arena), SyntaxCache(SyntaxCache) {}
8089
};
8190

8291
private:
@@ -97,6 +106,9 @@ class alignas(1 << SyntaxAlignInBits) SyntaxParsingContext {
97106
// Discard all parts in the context.
98107
Discard,
99108

109+
// The node has been loaded from the cache and all parts shall be discarded.
110+
LoadedFromCache,
111+
100112
// Construct SourceFile syntax to the specified SF.
101113
Root,
102114

@@ -112,9 +124,7 @@ class alignas(1 << SyntaxAlignInBits) SyntaxParsingContext {
112124
// Reference to the
113125
SyntaxParsingContext *&CtxtHolder;
114126

115-
SyntaxArena &Arena;
116-
117-
std::vector<RC<RawSyntax>> &Storage;
127+
RootContextData *RootData;
118128

119129
// Offet for 'Storage' this context owns from.
120130
const size_t Offset;
@@ -138,7 +148,7 @@ class alignas(1 << SyntaxAlignInBits) SyntaxParsingContext {
138148
void createNodeInPlace(SyntaxKind Kind, size_t N);
139149

140150
ArrayRef<RC<RawSyntax>> getParts() const {
141-
return makeArrayRef(Storage).drop_front(Offset);
151+
return makeArrayRef(getStorage()).drop_front(Offset);
142152
}
143153

144154
RC<RawSyntax> makeUnknownSyntax(SyntaxKind Kind,
@@ -154,11 +164,12 @@ class alignas(1 << SyntaxAlignInBits) SyntaxParsingContext {
154164
/// Designated constructor for child context.
155165
SyntaxParsingContext(SyntaxParsingContext *&CtxtHolder)
156166
: RootDataOrParent(CtxtHolder), CtxtHolder(CtxtHolder),
157-
Arena(CtxtHolder->Arena),
158-
Storage(CtxtHolder->Storage), Offset(Storage.size()),
167+
RootData(CtxtHolder->RootData), Offset(RootData->Storage.size()),
159168
Enabled(CtxtHolder->isEnabled()) {
160169
assert(CtxtHolder->isTopOfContextStack() &&
161170
"SyntaxParsingContext cannot have multiple children");
171+
assert(CtxtHolder->Mode != AccumulationMode::LoadedFromCache &&
172+
"Cannot create child context for a node loaded from the cache");
162173
CtxtHolder = this;
163174
}
164175

@@ -174,20 +185,41 @@ class alignas(1 << SyntaxAlignInBits) SyntaxParsingContext {
174185

175186
~SyntaxParsingContext();
176187

188+
/// Try loading the current node from the \c SyntaxParsingCache by looking up
189+
/// if an unmodified node exists at \p LexerOffset of the same kind. If a node
190+
/// is found, replace the node that is currently being constructed by the
191+
/// parsing context with the node from the cache and return the number of
192+
/// bytes the loaded node took up in the original source. The lexer should
193+
/// pretend it has read these bytes and continue from the advanced offset.
194+
/// If nothing is found \c 0 is returned.
195+
size_t loadFromCache(size_t LexerOffset);
196+
177197
void disable() { Enabled = false; }
178198
bool isEnabled() const { return Enabled; }
179199
bool isRoot() const { return RootDataOrParent.is<RootContextData*>(); }
180200
bool isTopOfContextStack() const { return this == CtxtHolder; }
181201

182-
SyntaxParsingContext *getParent() {
202+
SyntaxParsingContext *getParent() const {
183203
return RootDataOrParent.get<SyntaxParsingContext*>();
184204
}
185205

186-
RootContextData &getRootData() {
187-
return *getRoot()->RootDataOrParent.get<RootContextData*>();
206+
RootContextData *getRootData() { return RootData; }
207+
208+
const RootContextData *getRootData() const { return RootData; }
209+
210+
std::vector<RC<RawSyntax>> &getStorage() { return getRootData()->Storage; }
211+
212+
const std::vector<RC<RawSyntax>> &getStorage() const {
213+
return getRootData()->Storage;
214+
}
215+
216+
SyntaxParsingCache *getSyntaxParsingCache() const {
217+
return getRootData()->SyntaxCache;
188218
}
189219

190-
SyntaxParsingContext *getRoot();
220+
SyntaxArena &getArena() const { return getRootData()->Arena; }
221+
222+
const SyntaxParsingContext *getRoot() const;
191223

192224
/// Add RawSyntax to the parts.
193225
void addRawSyntax(RC<RawSyntax> Raw);
@@ -201,6 +233,7 @@ class alignas(1 << SyntaxAlignInBits) SyntaxParsingContext {
201233

202234
template<typename SyntaxNode>
203235
llvm::Optional<SyntaxNode> popIf() {
236+
auto &Storage = getStorage();
204237
assert(Storage.size() > Offset);
205238
if (auto Node = make<Syntax>(Storage.back()).getAs<SyntaxNode>()) {
206239
Storage.pop_back();
@@ -210,6 +243,7 @@ class alignas(1 << SyntaxAlignInBits) SyntaxParsingContext {
210243
}
211244

212245
TokenSyntax popToken() {
246+
auto &Storage = getStorage();
213247
assert(Storage.size() > Offset);
214248
assert(Storage.back()->getKind() == SyntaxKind::Token);
215249
auto Node = make<TokenSyntax>(std::move(Storage.back()));
@@ -263,6 +297,10 @@ class alignas(1 << SyntaxAlignInBits) SyntaxParsingContext {
263297
/// Make a missing node corresponding to the given node kind, and
264298
/// push this node into the context.
265299
void synthesize(SyntaxKind Kind);
300+
301+
/// Dump the nodes that are in the storage stack of the SyntaxParsingContext
302+
LLVM_ATTRIBUTE_DEPRECATED(void dumpStorage() const LLVM_ATTRIBUTE_USED,
303+
"Only meant for use in the debugger");
266304
};
267305

268306
} // namespace swift

0 commit comments

Comments
 (0)