Skip to content

Commit 8430eff

Browse files
committed
[libSyntax] Add syntax coloring based on the syntax tree
1 parent 03a7042 commit 8430eff

File tree

12 files changed

+453
-23
lines changed

12 files changed

+453
-23
lines changed

include/swift/Syntax/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ set(generated_include_sources
88
SyntaxKind.h.gyb
99
SyntaxNodes.h.gyb
1010
SyntaxBuilders.h.gyb
11+
SyntaxClassifier.h.gyb
1112
SyntaxFactory.h.gyb
1213
SyntaxVisitor.h.gyb
1314
Trivia.h.gyb)
Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
%{
2+
# -*- mode: C++ -*-
3+
from gyb_syntax_support import *
4+
NODE_MAP = create_node_map()
5+
# Ignore the following admonition; it applies to the resulting .h file only
6+
}%
7+
//// Automatically Generated From SyntaxClassifier.h.gyb.
8+
//// Do Not Edit Directly!
9+
//===----------- SyntaxClassifier.h - SyntaxClassifier definitions --------===//
10+
//
11+
// This source file is part of the Swift.org open source project
12+
//
13+
// Copyright (c) 2014 - 2018 Apple Inc. and the Swift project authors
14+
// Licensed under Apache License v2.0 with Runtime Library Exception
15+
//
16+
// See https://swift.org/LICENSE.txt for license information
17+
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
18+
//
19+
//===----------------------------------------------------------------------===//
20+
21+
#ifndef SWIFT_SYNTAX_CLASSIFIER_H
22+
#define SWIFT_SYNTAX_CLASSIFIER_H
23+
24+
#include "swift/Syntax/SyntaxVisitor.h"
25+
#include <stack>
26+
27+
namespace swift {
28+
namespace syntax {
29+
30+
31+
/// A classification that determines which color a token should be colored in
32+
/// for syntax coloring.
33+
enum class SyntaxClassification {
34+
None,
35+
Keyword,
36+
Identifier,
37+
DollarIdentifier,
38+
IntegerLiteral,
39+
FloatingLiteral,
40+
StringLiteral,
41+
/// Marks the parens for a string interpolation.
42+
StringInterpolationAnchor,
43+
TypeIdentifier,
44+
/// #if/#else/#endif occurrence.
45+
BuildConfigKeyword,
46+
/// An identifier in a #if condition.
47+
BuildConfigId,
48+
/// #-keywords like #warning, #sourceLocation
49+
PoundDirectiveKeyword,
50+
/// Any occurrence of '@<attribute-name>' anywhere.
51+
Attribute,
52+
/// An editor placeholder string <#like this#>.
53+
EditorPlaceholder,
54+
ObjectLiteral
55+
};
56+
57+
58+
class SyntaxClassifier: public SyntaxVisitor {
59+
struct ContextStackEntry {
60+
/// The classification all identifiers shall inherit
61+
SyntaxClassification Classification;
62+
/// If set to \c true, all tokens will be forced to receive the above
63+
/// classification, overriding their context-free classification
64+
bool ForceClassification;
65+
66+
ContextStackEntry(SyntaxClassification Classification,
67+
bool ForceClassification)
68+
: Classification(Classification),
69+
ForceClassification(ForceClassification) {}
70+
};
71+
72+
std::map<unsigned, SyntaxClassification> ClassifiedTokens;
73+
/// The top classification of this stack determines the color of identifiers
74+
std::stack<ContextStackEntry, llvm::SmallVector<ContextStackEntry, 16>> ContextStack;
75+
76+
template<typename T>
77+
void visit(T Node, SyntaxClassification Classification,
78+
bool ForceClassification) {
79+
ContextStack.emplace(Classification, ForceClassification);
80+
visit(Node);
81+
ContextStack.pop();
82+
}
83+
84+
template<typename T>
85+
void visit(llvm::Optional<T> OptNode) {
86+
if (OptNode.hasValue()) {
87+
static_cast<SyntaxVisitor *>(this)->visit(OptNode.getValue());
88+
}
89+
}
90+
91+
virtual void visit(TokenSyntax TokenNode) override;
92+
93+
virtual void visit(Syntax Node) override {
94+
SyntaxVisitor::visit(Node);
95+
}
96+
97+
% for node in SYNTAX_NODES:
98+
% if is_visitable(node):
99+
virtual void visit(${node.name} Node) override;
100+
% end
101+
% end
102+
103+
public:
104+
std::map<unsigned, SyntaxClassification> classify(Syntax Node) {
105+
// Clean up the environment
106+
ContextStack = std::stack<ContextStackEntry, llvm::SmallVector<ContextStackEntry, 16>>();
107+
ContextStack.push({SyntaxClassification::None, false});
108+
ClassifiedTokens.clear();
109+
110+
Node.accept(*this);
111+
112+
return ClassifiedTokens;
113+
}
114+
};
115+
} // namespace syntax
116+
} // namespace swift
117+
118+
#endif // SWIFT_SYNTAX_CLASSIFIER_H

include/swift/Syntax/SyntaxVisitor.h.gyb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ struct SyntaxVisitor {
4242

4343
virtual void visitPre(Syntax node) {}
4444
virtual void visitPost(Syntax node) {}
45-
void visit(Syntax node);
45+
virtual void visit(Syntax node);
4646

4747
void visitChildren(Syntax node) {
4848
for (unsigned i = 0, e = node.getNumChildren(); i != e; ++i) {

lib/Syntax/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,5 +14,6 @@ add_swift_library(swiftSyntax STATIC
1414
RawSyntax.cpp
1515
Syntax.cpp
1616
SyntaxArena.cpp
17+
SyntaxClassifier.cpp.gyb
1718
SyntaxData.cpp
1819
UnknownSyntax.cpp)

lib/Syntax/SyntaxClassifier.cpp.gyb

Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,159 @@
1+
%{
2+
from gyb_syntax_support import *
3+
# -*- mode: C++ -*-
4+
# Ignore the following admonition; it applies to the resulting .cpp file only
5+
}%
6+
//// Automatically Generated From SyntaxClassifier.cpp.gyb.
7+
//// Do Not Edit Directly!
8+
//===----- SyntaxClassifier.cpp - Syntax Classifier implementations -------===//
9+
//
10+
// This source file is part of the Swift.org open source project
11+
//
12+
// Copyright (c) 2014 - 2018 Apple Inc. and the Swift project authors
13+
// Licensed under Apache License v2.0 with Runtime Library Exception
14+
//
15+
// See https://swift.org/LICENSE.txt for license information
16+
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
17+
//
18+
//===----------------------------------------------------------------------===//
19+
//
20+
// This file defines the Syntax Classifier, which walks the syntax tree and
21+
// creates a classification table for all tokens in the syntax tree, mapping it
22+
// to a \c SyntaxClassification by its ID.
23+
//
24+
//===----------------------------------------------------------------------===//
25+
26+
#include "swift/Basic/EditorPlaceholder.h"
27+
#include "swift/Syntax/SyntaxClassifier.h"
28+
29+
using namespace swift;
30+
using namespace swift::syntax;
31+
32+
% for node in SYNTAX_NODES:
33+
% if is_visitable(node):
34+
void SyntaxClassifier::visit(${node.name} Node) {
35+
% if node.is_unknown() or node.is_syntax_collection():
36+
SyntaxVisitor::visit(Node);
37+
% else:
38+
% for child in node.children:
39+
% if child.classification:
40+
visit(Node.get${child.name}(), SyntaxClassification::${child.classification}, ${"true" if child.force_classification else "false"});
41+
% else:
42+
visit(Node.get${child.name}());
43+
% end
44+
% end
45+
% end
46+
}
47+
% end
48+
% end
49+
50+
/// Returns the SyntaxClassficiation a token node should receive if it is not
51+
/// inside a special context. Returns \c None if the token has no context-free
52+
/// classification and should always inherit from the context.
53+
llvm::Optional<SyntaxClassification>
54+
getContextFreeClassificationForToken(TokenSyntax TokenNode) {
55+
switch (TokenNode.getTokenKind()) {
56+
#define KEYWORD(KW) case tok::kw_##KW: return SyntaxClassification::Keyword;
57+
#define POUND_KEYWORD(KW) case tok::pound_##KW: return SyntaxClassification::Keyword;
58+
#define POUND_OBJECT_LITERAL(KW, desc, proto) case tok::pound_##KW: return SyntaxClassification::ObjectLiteral;
59+
#define POUND_DIRECTIVE_KEYWORD(KW) case tok::pound_##KW: return SyntaxClassification::PoundDirectiveKeyword;
60+
#define POUND_COND_DIRECTIVE_KEYWORD(KW) case tok::pound_##KW: return SyntaxClassification::BuildConfigKeyword;
61+
#include "swift/Syntax/TokenKinds.def"
62+
// Punctuators
63+
case tok::l_paren:
64+
case tok::r_paren:
65+
case tok::l_brace:
66+
case tok::r_brace:
67+
case tok::l_square:
68+
case tok::r_square:
69+
case tok::l_angle:
70+
case tok::r_angle:
71+
case tok::period:
72+
case tok::period_prefix:
73+
case tok::comma:
74+
case tok::colon:
75+
case tok::semi:
76+
case tok::equal:
77+
case tok::pound:
78+
case tok::amp_prefix:
79+
case tok::arrow:
80+
case tok::backtick:
81+
case tok::backslash:
82+
case tok::exclaim_postfix:
83+
case tok::question_postfix:
84+
case tok::question_infix:
85+
case tok::sil_dollar:
86+
case tok::sil_exclamation:
87+
return SyntaxClassification::None;
88+
case tok::string_quote:
89+
case tok::multiline_string_quote:
90+
return SyntaxClassification::StringLiteral;
91+
case tok::at_sign:
92+
return SyntaxClassification::Attribute;
93+
94+
// Literals
95+
case tok::integer_literal:
96+
return SyntaxClassification::IntegerLiteral;
97+
case tok::floating_literal:
98+
return SyntaxClassification::FloatingLiteral;
99+
case tok::string_literal:
100+
return SyntaxClassification::StringLiteral;
101+
102+
// Miscelaneous
103+
case tok::identifier: {
104+
if (isEditorPlaceholder(TokenNode.getText())) {
105+
return SyntaxClassification::EditorPlaceholder;
106+
} else {
107+
return llvm::None;
108+
}
109+
}
110+
case tok::unknown:
111+
if (TokenNode.getText().startswith("\"")) {
112+
// Unterminated string literal
113+
return SyntaxClassification::StringLiteral;
114+
} else {
115+
return SyntaxClassification::None;
116+
}
117+
break;
118+
case tok::eof:
119+
case tok::code_complete:
120+
case tok::oper_binary_unspaced:
121+
case tok::oper_binary_spaced:
122+
case tok::oper_postfix:
123+
case tok::oper_prefix:
124+
return SyntaxClassification::None;
125+
case tok::dollarident:
126+
return SyntaxClassification::DollarIdentifier;
127+
case tok::sil_local_name:
128+
return SyntaxClassification::None;
129+
case tok::comment:
130+
llvm_unreachable("Comments should be in trivia");
131+
case tok::contextual_keyword:
132+
return SyntaxClassification::Keyword;
133+
case tok::string_segment:
134+
return SyntaxClassification::StringLiteral;
135+
case tok::string_interpolation_anchor:
136+
return SyntaxClassification::StringInterpolationAnchor;
137+
case tok::NUM_TOKENS:
138+
llvm_unreachable("");
139+
}
140+
}
141+
142+
void SyntaxClassifier::visit(TokenSyntax TokenNode) {
143+
SyntaxClassification Classification = ContextStack.top().Classification;
144+
bool ForceClassification = ContextStack.top().ForceClassification;
145+
if (!ForceClassification) {
146+
auto NativeClassification = getContextFreeClassificationForToken(TokenNode);
147+
if (NativeClassification.hasValue()) {
148+
Classification = NativeClassification.getValue();
149+
}
150+
if (Classification == SyntaxClassification::None &&
151+
TokenNode.getTokenKind() == tok::identifier) {
152+
Classification = SyntaxClassification::Identifier;
153+
}
154+
}
155+
156+
assert(ClassifiedTokens.count(TokenNode.getId()) == 0 &&
157+
"Token already classified");
158+
ClassifiedTokens[TokenNode.getId()] = Classification;
159+
}

0 commit comments

Comments
 (0)