Skip to content

Commit 3bf94ab

Browse files
authored
Merge pull request #17621 from ahoppen/002-sytnax-tree-based-coloring
[libSyntax] Syntax colouring based on the syntax tree
2 parents 4b9d611 + 6bc1b5a commit 3bf94ab

File tree

58 files changed

+2086
-365
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

58 files changed

+2086
-365
lines changed

include/swift/AST/Identifier.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#ifndef SWIFT_AST_IDENTIFIER_H
1818
#define SWIFT_AST_IDENTIFIER_H
1919

20+
#include "swift/Basic/EditorPlaceholder.h"
2021
#include "swift/Basic/LLVM.h"
2122
#include "llvm/ADT/FoldingSet.h"
2223
#include "llvm/ADT/PointerUnion.h"
@@ -125,7 +126,7 @@ class Identifier {
125126
}
126127

127128
static bool isEditorPlaceholder(StringRef name) {
128-
return name.startswith("<#");
129+
return swift::isEditorPlaceholder(name);
129130
}
130131

131132
bool isEditorPlaceholder() const {

include/swift/Basic/EditorPlaceholder.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ struct EditorPlaceholderData {
4646
Optional<EditorPlaceholderData>
4747
parseEditorPlaceholder(StringRef PlaceholderText);
4848

49+
/// Checks if an identifier with the given text is an editor placeholder
50+
bool isEditorPlaceholder(StringRef IdentifierText);
4951
} // end namespace swift
5052

5153
#endif // SWIFT_BASIC_EDITORPLACEHOLDER_H

include/swift/Syntax/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ set(generated_include_sources
88
SyntaxKind.h.gyb
99
SyntaxNodes.h.gyb
1010
SyntaxBuilders.h.gyb
11+
SyntaxClassifier.h.gyb
1112
SyntaxFactory.h.gyb
1213
SyntaxVisitor.h.gyb
1314
Trivia.h.gyb)

include/swift/Syntax/RawSyntax.h

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,13 @@ class RawSyntax final
221221
TriviaPiece> {
222222
friend TrailingObjects;
223223

224+
/// The ID that shall be used for the next node that is created and does not
225+
/// have a manually specified id
226+
static unsigned NextFreeNodeId;
227+
228+
/// An ID of this node that is stable across incremental parses
229+
unsigned NodeId;
230+
224231
union {
225232
uint64_t OpaqueBits;
226233
struct {
@@ -272,13 +279,17 @@ class RawSyntax final
272279
}
273280

274281
/// Constructor for creating layout nodes
282+
/// If \p NodeId is \c None, the next free NodeId is used, if it is passed,
283+
/// the caller needs to assure that the node ID has not been used yet.
275284
RawSyntax(SyntaxKind Kind, ArrayRef<RC<RawSyntax>> Layout,
276-
SourcePresence Presence, bool ManualMemory);
285+
SourcePresence Presence, bool ManualMemory,
286+
llvm::Optional<unsigned> NodeId);
277287
/// Constructor for creating token nodes
278-
RawSyntax(tok TokKind, OwnedString Text,
279-
ArrayRef<TriviaPiece> LeadingTrivia,
280-
ArrayRef<TriviaPiece> TrailingTrivia,
281-
SourcePresence Presence, bool ManualMemory);
288+
/// If \p NodeId is \c None, the next free NodeId is used, if it is passed,
289+
/// the caller needs to assure that the NodeId has not been used yet.
290+
RawSyntax(tok TokKind, OwnedString Text, ArrayRef<TriviaPiece> LeadingTrivia,
291+
ArrayRef<TriviaPiece> TrailingTrivia, SourcePresence Presence,
292+
bool ManualMemory, llvm::Optional<unsigned> NodeId);
282293

283294
public:
284295
~RawSyntax();
@@ -300,14 +311,16 @@ class RawSyntax final
300311
/// Make a raw "layout" syntax node.
301312
static RC<RawSyntax> make(SyntaxKind Kind, ArrayRef<RC<RawSyntax>> Layout,
302313
SourcePresence Presence,
303-
SyntaxArena *Arena = nullptr);
314+
SyntaxArena *Arena = nullptr,
315+
llvm::Optional<unsigned> NodeId = llvm::None);
304316

305317
/// Make a raw "token" syntax node.
306318
static RC<RawSyntax> make(tok TokKind, OwnedString Text,
307319
ArrayRef<TriviaPiece> LeadingTrivia,
308320
ArrayRef<TriviaPiece> TrailingTrivia,
309321
SourcePresence Presence,
310-
SyntaxArena *Arena = nullptr);
322+
SyntaxArena *Arena = nullptr,
323+
llvm::Optional<unsigned> NodeId = llvm::None);
311324

312325
/// Make a missing raw "layout" syntax node.
313326
static RC<RawSyntax> missing(SyntaxKind Kind, SyntaxArena *Arena = nullptr) {
@@ -335,6 +348,9 @@ class RawSyntax final
335348
return static_cast<SyntaxKind>(Bits.Common.Kind);
336349
}
337350

351+
/// Get an ID for this node that is stable across incremental parses
352+
unsigned getId() const { return NodeId; }
353+
338354
/// Returns true if the node is "missing" in the source (i.e. it was
339355
/// expected (or optional) but not written.
340356
bool isMissing() const { return getPresence() == SourcePresence::Missing; }

include/swift/Syntax/Serialization/SyntaxDeserialization.h

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -156,16 +156,30 @@ template <> struct MappingTraits<swift::RC<swift::RawSyntax>> {
156156
in.mapRequired("trailingTrivia", trailingTrivia);
157157
swift::SourcePresence presence;
158158
in.mapRequired("presence", presence);
159-
value = swift::RawSyntax::make(tokenKind, text, leadingTrivia,
160-
trailingTrivia, presence, nullptr);
159+
/// FIXME: This is a workaround for existing bug from llvm yaml parser
160+
/// which would raise error when deserializing number with trailing
161+
/// character like "1\n". See https://bugs.llvm.org/show_bug.cgi?id=15505
162+
StringRef nodeIdString;
163+
in.mapRequired("id", nodeIdString);
164+
unsigned nodeId = std::atoi(nodeIdString.data());
165+
value =
166+
swift::RawSyntax::make(tokenKind, text, leadingTrivia, trailingTrivia,
167+
presence, /*Arena=*/nullptr, nodeId);
161168
} else {
162169
swift::SyntaxKind kind;
163170
in.mapRequired("kind", kind);
164171
std::vector<swift::RC<swift::RawSyntax>> layout;
165172
in.mapRequired("layout", layout);
166173
swift::SourcePresence presence;
167174
in.mapRequired("presence", presence);
168-
value = swift::RawSyntax::make(kind, layout, presence, nullptr);
175+
/// FIXME: This is a workaround for existing bug from llvm yaml parser
176+
/// which would raise error when deserializing number with trailing
177+
/// character like "1\n". See https://bugs.llvm.org/show_bug.cgi?id=15505
178+
StringRef nodeIdString;
179+
in.mapRequired("id", nodeIdString);
180+
unsigned nodeId = std::atoi(nodeIdString.data());
181+
value = swift::RawSyntax::make(kind, layout, presence, /*Arena=*/nullptr,
182+
nodeId);
169183
}
170184
}
171185
};

include/swift/Syntax/Serialization/SyntaxSerialization.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,8 @@ struct ObjectTraits<syntax::RawSyntax> {
141141
}
142142
auto presence = value.getPresence();
143143
out.mapRequired("presence", presence);
144+
auto nodeId = value.getId();
145+
out.mapRequired("id", nodeId);
144146
}
145147
};
146148

include/swift/Syntax/Syntax.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,9 @@ class Syntax {
8484
/// Get the shared raw syntax.
8585
RC<RawSyntax> getRaw() const;
8686

87+
/// Get an ID for this node that is stable across incremental parses
88+
unsigned getId() const { return getRaw()->getId(); }
89+
8790
/// Get the number of child nodes in this piece of syntax, not including
8891
/// tokens.
8992
size_t getNumChildren() const;
Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
%{
2+
# -*- mode: C++ -*-
3+
from gyb_syntax_support import *
4+
NODE_MAP = create_node_map()
5+
# Ignore the following admonition; it applies to the resulting .h file only
6+
}%
7+
//// Automatically Generated From SyntaxClassifier.h.gyb.
8+
//// Do Not Edit Directly!
9+
//===----------- SyntaxClassifier.h - SyntaxClassifier definitions --------===//
10+
//
11+
// This source file is part of the Swift.org open source project
12+
//
13+
// Copyright (c) 2014 - 2018 Apple Inc. and the Swift project authors
14+
// Licensed under Apache License v2.0 with Runtime Library Exception
15+
//
16+
// See https://swift.org/LICENSE.txt for license information
17+
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
18+
//
19+
//===----------------------------------------------------------------------===//
20+
21+
#ifndef SWIFT_SYNTAX_CLASSIFIER_H
22+
#define SWIFT_SYNTAX_CLASSIFIER_H
23+
24+
#include "swift/Syntax/SyntaxVisitor.h"
25+
#include <stack>
26+
27+
namespace swift {
28+
namespace syntax {
29+
30+
31+
/// A classification that determines which color a token should be colored in
32+
/// for syntax coloring.
33+
enum class SyntaxClassification {
34+
None,
35+
Keyword,
36+
Identifier,
37+
DollarIdentifier,
38+
IntegerLiteral,
39+
FloatingLiteral,
40+
StringLiteral,
41+
/// Marks the parens for a string interpolation.
42+
StringInterpolationAnchor,
43+
TypeIdentifier,
44+
/// #if/#else/#endif occurrence.
45+
BuildConfigKeyword,
46+
/// An identifier in a #if condition.
47+
BuildConfigId,
48+
/// #-keywords like #warning, #sourceLocation
49+
PoundDirectiveKeyword,
50+
/// Any occurrence of '@<attribute-name>' anywhere.
51+
Attribute,
52+
/// An editor placeholder string <#like this#>.
53+
EditorPlaceholder,
54+
ObjectLiteral
55+
};
56+
57+
58+
class SyntaxClassifier: public SyntaxVisitor {
59+
struct ContextStackEntry {
60+
/// The classification all identifiers shall inherit
61+
SyntaxClassification Classification;
62+
/// If set to \c true, all tokens will be forced to receive the above
63+
/// classification, overriding their context-free classification
64+
bool ForceClassification;
65+
66+
ContextStackEntry(SyntaxClassification Classification,
67+
bool ForceClassification)
68+
: Classification(Classification),
69+
ForceClassification(ForceClassification) {}
70+
};
71+
72+
std::map<unsigned, SyntaxClassification> ClassifiedTokens;
73+
/// The top classification of this stack determines the color of identifiers
74+
std::stack<ContextStackEntry, llvm::SmallVector<ContextStackEntry, 16>> ContextStack;
75+
76+
template<typename T>
77+
void visit(T Node, SyntaxClassification Classification,
78+
bool ForceClassification) {
79+
ContextStack.emplace(Classification, ForceClassification);
80+
visit(Node);
81+
ContextStack.pop();
82+
}
83+
84+
template<typename T>
85+
void visit(llvm::Optional<T> OptNode) {
86+
if (OptNode.hasValue()) {
87+
static_cast<SyntaxVisitor *>(this)->visit(OptNode.getValue());
88+
}
89+
}
90+
91+
virtual void visit(TokenSyntax TokenNode) override;
92+
93+
virtual void visit(Syntax Node) override {
94+
SyntaxVisitor::visit(Node);
95+
}
96+
97+
% for node in SYNTAX_NODES:
98+
% if is_visitable(node):
99+
virtual void visit(${node.name} Node) override;
100+
% end
101+
% end
102+
103+
public:
104+
std::map<unsigned, SyntaxClassification> classify(Syntax Node) {
105+
// Clean up the environment
106+
ContextStack = std::stack<ContextStackEntry, llvm::SmallVector<ContextStackEntry, 16>>();
107+
ContextStack.push({SyntaxClassification::None, false});
108+
ClassifiedTokens.clear();
109+
110+
Node.accept(*this);
111+
112+
return ClassifiedTokens;
113+
}
114+
};
115+
} // namespace syntax
116+
} // namespace swift
117+
118+
#endif // SWIFT_SYNTAX_CLASSIFIER_H

include/swift/Syntax/SyntaxVisitor.h.gyb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ struct SyntaxVisitor {
4242

4343
virtual void visitPre(Syntax node) {}
4444
virtual void visitPost(Syntax node) {}
45-
void visit(Syntax node);
45+
virtual void visit(Syntax node);
4646

4747
void visitChildren(Syntax node) {
4848
for (unsigned i = 0, e = node.getNumChildren(); i != e; ++i) {

lib/Basic/EditorPlaceholder.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,3 +78,7 @@ swift::parseEditorPlaceholder(StringRef PlaceholderText) {
7878

7979
return PHDataTyped;
8080
}
81+
82+
bool swift::isEditorPlaceholder(StringRef IdentifierText) {
83+
return IdentifierText.startswith("<#");
84+
}

lib/Syntax/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,5 +14,6 @@ add_swift_library(swiftSyntax STATIC
1414
RawSyntax.cpp
1515
Syntax.cpp
1616
SyntaxArena.cpp
17+
SyntaxClassifier.cpp.gyb
1718
SyntaxData.cpp
1819
UnknownSyntax.cpp)

lib/Syntax/RawSyntax.cpp

Lines changed: 27 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -67,10 +67,19 @@ static void dumpTokenKind(llvm::raw_ostream &OS, tok Kind) {
6767

6868
} // end of anonymous namespace
6969

70+
unsigned RawSyntax::NextFreeNodeId = 1;
71+
7072
RawSyntax::RawSyntax(SyntaxKind Kind, ArrayRef<RC<RawSyntax>> Layout,
71-
SourcePresence Presence, bool ManualMemory) {
73+
SourcePresence Presence, bool ManualMemory,
74+
llvm::Optional<unsigned> NodeId) {
7275
assert(Kind != SyntaxKind::Token &&
7376
"'token' syntax node must be constructed with dedicated constructor");
77+
if (NodeId.hasValue()) {
78+
this->NodeId = NodeId.getValue();
79+
NextFreeNodeId = std::max(this->NodeId + 1, NextFreeNodeId);
80+
} else {
81+
this->NodeId = NextFreeNodeId++;
82+
}
7483
Bits.Common.Kind = unsigned(Kind);
7584
Bits.Common.Presence = unsigned(Presence);
7685
Bits.Common.ManualMemory = unsigned(ManualMemory);
@@ -92,7 +101,14 @@ RawSyntax::RawSyntax(SyntaxKind Kind, ArrayRef<RC<RawSyntax>> Layout,
92101
RawSyntax::RawSyntax(tok TokKind, OwnedString Text,
93102
ArrayRef<TriviaPiece> LeadingTrivia,
94103
ArrayRef<TriviaPiece> TrailingTrivia,
95-
SourcePresence Presence, bool ManualMemory) {
104+
SourcePresence Presence, bool ManualMemory,
105+
llvm::Optional<unsigned> NodeId) {
106+
if (NodeId.hasValue()) {
107+
this->NodeId = NodeId.getValue();
108+
NextFreeNodeId = std::max(this->NodeId + 1, NextFreeNodeId);
109+
} else {
110+
this->NodeId = NextFreeNodeId++;
111+
}
96112
Bits.Common.Kind = unsigned(SyntaxKind::Token);
97113
Bits.Common.Presence = unsigned(Presence);
98114
Bits.Common.ManualMemory = unsigned(ManualMemory);
@@ -126,25 +142,28 @@ RawSyntax::~RawSyntax() {
126142
}
127143

128144
RC<RawSyntax> RawSyntax::make(SyntaxKind Kind, ArrayRef<RC<RawSyntax>> Layout,
129-
SourcePresence Presence, SyntaxArena *Arena) {
145+
SourcePresence Presence, SyntaxArena *Arena,
146+
llvm::Optional<unsigned> NodeId) {
130147
auto size = totalSizeToAlloc<RC<RawSyntax>, OwnedString, TriviaPiece>(
131148
Layout.size(), 0, 0);
132149
void *data = Arena ? Arena->AllocateRawSyntax(size, alignof(RawSyntax))
133150
: ::operator new(size);
134-
return RC<RawSyntax>(new (data)
135-
RawSyntax(Kind, Layout, Presence, bool(Arena)));
151+
return RC<RawSyntax>(
152+
new (data) RawSyntax(Kind, Layout, Presence, bool(Arena), NodeId));
136153
}
137154

138155
RC<RawSyntax> RawSyntax::make(tok TokKind, OwnedString Text,
139156
ArrayRef<TriviaPiece> LeadingTrivia,
140157
ArrayRef<TriviaPiece> TrailingTrivia,
141-
SourcePresence Presence, SyntaxArena *Arena) {
158+
SourcePresence Presence, SyntaxArena *Arena,
159+
llvm::Optional<unsigned> NodeId) {
142160
auto size = totalSizeToAlloc<RC<RawSyntax>, OwnedString, TriviaPiece>(
143161
0, 1, LeadingTrivia.size() + TrailingTrivia.size());
144162
void *data = Arena ? Arena->AllocateRawSyntax(size, alignof(RawSyntax))
145163
: ::operator new(size);
146-
return RC<RawSyntax>(new (data) RawSyntax(
147-
TokKind, Text, LeadingTrivia, TrailingTrivia, Presence, bool(Arena)));
164+
return RC<RawSyntax>(new (data) RawSyntax(TokKind, Text, LeadingTrivia,
165+
TrailingTrivia, Presence,
166+
bool(Arena), NodeId));
148167
}
149168

150169
RC<RawSyntax> RawSyntax::append(RC<RawSyntax> NewLayoutElement) const {

0 commit comments

Comments
 (0)