Skip to content

[libSyntax] Syntax colouring based on the syntax tree #17621

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Jul 17, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion include/swift/AST/Identifier.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#ifndef SWIFT_AST_IDENTIFIER_H
#define SWIFT_AST_IDENTIFIER_H

#include "swift/Basic/EditorPlaceholder.h"
#include "swift/Basic/LLVM.h"
#include "llvm/ADT/FoldingSet.h"
#include "llvm/ADT/PointerUnion.h"
Expand Down Expand Up @@ -125,7 +126,7 @@ class Identifier {
}

static bool isEditorPlaceholder(StringRef name) {
return name.startswith("<#");
return swift::isEditorPlaceholder(name);
}

bool isEditorPlaceholder() const {
Expand Down
2 changes: 2 additions & 0 deletions include/swift/Basic/EditorPlaceholder.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ struct EditorPlaceholderData {
Optional<EditorPlaceholderData>
parseEditorPlaceholder(StringRef PlaceholderText);

/// Checks if an identifier with the given text is an editor placeholder
bool isEditorPlaceholder(StringRef IdentifierText);
} // end namespace swift

#endif // SWIFT_BASIC_EDITORPLACEHOLDER_H
1 change: 1 addition & 0 deletions include/swift/Syntax/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ set(generated_include_sources
SyntaxKind.h.gyb
SyntaxNodes.h.gyb
SyntaxBuilders.h.gyb
SyntaxClassifier.h.gyb
SyntaxFactory.h.gyb
SyntaxVisitor.h.gyb
Trivia.h.gyb)
Expand Down
30 changes: 23 additions & 7 deletions include/swift/Syntax/RawSyntax.h
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,13 @@ class RawSyntax final
TriviaPiece> {
friend TrailingObjects;

/// The ID that shall be used for the next node that is created and does not
/// have a manually specified id
static unsigned NextFreeNodeId;

/// An ID of this node that is stable across incremental parses
unsigned NodeId;

union {
uint64_t OpaqueBits;
struct {
Expand Down Expand Up @@ -272,13 +279,17 @@ class RawSyntax final
}

/// Constructor for creating layout nodes
/// If \p NodeId is \c None, the next free NodeId is used, if it is passed,
/// the caller needs to assure that the node ID has not been used yet.
RawSyntax(SyntaxKind Kind, ArrayRef<RC<RawSyntax>> Layout,
SourcePresence Presence, bool ManualMemory);
SourcePresence Presence, bool ManualMemory,
llvm::Optional<unsigned> NodeId);
/// Constructor for creating token nodes
RawSyntax(tok TokKind, OwnedString Text,
ArrayRef<TriviaPiece> LeadingTrivia,
ArrayRef<TriviaPiece> TrailingTrivia,
SourcePresence Presence, bool ManualMemory);
/// If \p NodeId is \c None, the next free NodeId is used, if it is passed,
/// the caller needs to assure that the NodeId has not been used yet.
RawSyntax(tok TokKind, OwnedString Text, ArrayRef<TriviaPiece> LeadingTrivia,
ArrayRef<TriviaPiece> TrailingTrivia, SourcePresence Presence,
bool ManualMemory, llvm::Optional<unsigned> NodeId);

public:
~RawSyntax();
Expand All @@ -300,14 +311,16 @@ class RawSyntax final
/// Make a raw "layout" syntax node.
static RC<RawSyntax> make(SyntaxKind Kind, ArrayRef<RC<RawSyntax>> Layout,
SourcePresence Presence,
SyntaxArena *Arena = nullptr);
SyntaxArena *Arena = nullptr,
llvm::Optional<unsigned> NodeId = llvm::None);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If NodeId should be always greater than 0, I think this can be unsigned NodeId = 0 just like token version. Am I missing something?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I initially used 0 to indicate that the NodeId should be picked automatically, but changed it to Optional<unsigned> because of @nkcsgexi's comment here: #16636 (comment). I'm fine with either.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think 0 as default is too un-readable. @nkcsgexi WDYT?
Either way, if there's no specific reason, please be consistent between token and layout.


/// Make a raw "token" syntax node.
static RC<RawSyntax> make(tok TokKind, OwnedString Text,
ArrayRef<TriviaPiece> LeadingTrivia,
ArrayRef<TriviaPiece> TrailingTrivia,
SourcePresence Presence,
SyntaxArena *Arena = nullptr);
SyntaxArena *Arena = nullptr,
llvm::Optional<unsigned> NodeId = llvm::None);

/// Make a missing raw "layout" syntax node.
static RC<RawSyntax> missing(SyntaxKind Kind, SyntaxArena *Arena = nullptr) {
Expand Down Expand Up @@ -335,6 +348,9 @@ class RawSyntax final
return static_cast<SyntaxKind>(Bits.Common.Kind);
}

/// Get an ID for this node that is stable across incremental parses
unsigned getId() const { return NodeId; }

/// Returns true if the node is "missing" in the source (i.e. it was
/// expected (or optional) but not written.
bool isMissing() const { return getPresence() == SourcePresence::Missing; }
Expand Down
20 changes: 17 additions & 3 deletions include/swift/Syntax/Serialization/SyntaxDeserialization.h
Original file line number Diff line number Diff line change
Expand Up @@ -156,16 +156,30 @@ template <> struct MappingTraits<swift::RC<swift::RawSyntax>> {
in.mapRequired("trailingTrivia", trailingTrivia);
swift::SourcePresence presence;
in.mapRequired("presence", presence);
value = swift::RawSyntax::make(tokenKind, text, leadingTrivia,
trailingTrivia, presence, nullptr);
/// FIXME: This is a workaround for existing bug from llvm yaml parser
/// which would raise error when deserializing number with trailing
/// character like "1\n". See https://bugs.llvm.org/show_bug.cgi?id=15505
StringRef nodeIdString;
in.mapRequired("id", nodeIdString);
unsigned nodeId = std::atoi(nodeIdString.data());
value =
swift::RawSyntax::make(tokenKind, text, leadingTrivia, trailingTrivia,
presence, /*Arena=*/nullptr, nodeId);
} else {
swift::SyntaxKind kind;
in.mapRequired("kind", kind);
std::vector<swift::RC<swift::RawSyntax>> layout;
in.mapRequired("layout", layout);
swift::SourcePresence presence;
in.mapRequired("presence", presence);
value = swift::RawSyntax::make(kind, layout, presence, nullptr);
/// FIXME: This is a workaround for existing bug from llvm yaml parser
/// which would raise error when deserializing number with trailing
/// character like "1\n". See https://bugs.llvm.org/show_bug.cgi?id=15505
StringRef nodeIdString;
in.mapRequired("id", nodeIdString);
unsigned nodeId = std::atoi(nodeIdString.data());
value = swift::RawSyntax::make(kind, layout, presence, /*Arena=*/nullptr,
nodeId);
}
}
};
Expand Down
2 changes: 2 additions & 0 deletions include/swift/Syntax/Serialization/SyntaxSerialization.h
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,8 @@ struct ObjectTraits<syntax::RawSyntax> {
}
auto presence = value.getPresence();
out.mapRequired("presence", presence);
auto nodeId = value.getId();
out.mapRequired("id", nodeId);
}
};

Expand Down
3 changes: 3 additions & 0 deletions include/swift/Syntax/Syntax.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,9 @@ class Syntax {
/// Get the shared raw syntax.
RC<RawSyntax> getRaw() const;

/// Get an ID for this node that is stable across incremental parses
unsigned getId() const { return getRaw()->getId(); }

/// Get the number of child nodes in this piece of syntax, not including
/// tokens.
size_t getNumChildren() const;
Expand Down
118 changes: 118 additions & 0 deletions include/swift/Syntax/SyntaxClassifier.h.gyb
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
%{
# -*- mode: C++ -*-
from gyb_syntax_support import *
NODE_MAP = create_node_map()
# Ignore the following admonition; it applies to the resulting .h file only
}%
//// Automatically Generated From SyntaxClassifier.h.gyb.
//// Do Not Edit Directly!
//===----------- SyntaxClassifier.h - SyntaxClassifier definitions --------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2018 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//

#ifndef SWIFT_SYNTAX_CLASSIFIER_H
#define SWIFT_SYNTAX_CLASSIFIER_H

#include "swift/Syntax/SyntaxVisitor.h"
#include <stack>

namespace swift {
namespace syntax {


/// A classification that determines which color a token should be colored in
/// for syntax coloring.
enum class SyntaxClassification {
None,
Keyword,
Identifier,
DollarIdentifier,
IntegerLiteral,
FloatingLiteral,
StringLiteral,
/// Marks the parens for a string interpolation.
StringInterpolationAnchor,
TypeIdentifier,
/// #if/#else/#endif occurrence.
BuildConfigKeyword,
/// An identifier in a #if condition.
BuildConfigId,
/// #-keywords like #warning, #sourceLocation
PoundDirectiveKeyword,
/// Any occurrence of '@<attribute-name>' anywhere.
Attribute,
/// An editor placeholder string <#like this#>.
EditorPlaceholder,
ObjectLiteral
};


class SyntaxClassifier: public SyntaxVisitor {
struct ContextStackEntry {
/// The classification all identifiers shall inherit
SyntaxClassification Classification;
/// If set to \c true, all tokens will be forced to receive the above
/// classification, overriding their context-free classification
bool ForceClassification;

ContextStackEntry(SyntaxClassification Classification,
bool ForceClassification)
: Classification(Classification),
ForceClassification(ForceClassification) {}
};

std::map<unsigned, SyntaxClassification> ClassifiedTokens;
/// The top classification of this stack determines the color of identifiers
std::stack<ContextStackEntry, llvm::SmallVector<ContextStackEntry, 16>> ContextStack;

template<typename T>
void visit(T Node, SyntaxClassification Classification,
bool ForceClassification) {
ContextStack.emplace(Classification, ForceClassification);
visit(Node);
ContextStack.pop();
}

template<typename T>
void visit(llvm::Optional<T> OptNode) {
if (OptNode.hasValue()) {
static_cast<SyntaxVisitor *>(this)->visit(OptNode.getValue());
}
}

virtual void visit(TokenSyntax TokenNode) override;

virtual void visit(Syntax Node) override {
SyntaxVisitor::visit(Node);
}

% for node in SYNTAX_NODES:
% if is_visitable(node):
virtual void visit(${node.name} Node) override;
% end
% end

public:
std::map<unsigned, SyntaxClassification> classify(Syntax Node) {
// Clean up the environment
ContextStack = std::stack<ContextStackEntry, llvm::SmallVector<ContextStackEntry, 16>>();
ContextStack.push({SyntaxClassification::None, false});
ClassifiedTokens.clear();

Node.accept(*this);

return ClassifiedTokens;
}
};
} // namespace syntax
} // namespace swift

#endif // SWIFT_SYNTAX_CLASSIFIER_H
2 changes: 1 addition & 1 deletion include/swift/Syntax/SyntaxVisitor.h.gyb
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ struct SyntaxVisitor {

virtual void visitPre(Syntax node) {}
virtual void visitPost(Syntax node) {}
void visit(Syntax node);
virtual void visit(Syntax node);

void visitChildren(Syntax node) {
for (unsigned i = 0, e = node.getNumChildren(); i != e; ++i) {
Expand Down
4 changes: 4 additions & 0 deletions lib/Basic/EditorPlaceholder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,3 +78,7 @@ swift::parseEditorPlaceholder(StringRef PlaceholderText) {

return PHDataTyped;
}

bool swift::isEditorPlaceholder(StringRef IdentifierText) {
return IdentifierText.startswith("<#");
}
1 change: 1 addition & 0 deletions lib/Syntax/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,6 @@ add_swift_library(swiftSyntax STATIC
RawSyntax.cpp
Syntax.cpp
SyntaxArena.cpp
SyntaxClassifier.cpp.gyb
SyntaxData.cpp
UnknownSyntax.cpp)
35 changes: 27 additions & 8 deletions lib/Syntax/RawSyntax.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,10 +67,19 @@ static void dumpTokenKind(llvm::raw_ostream &OS, tok Kind) {

} // end of anonymous namespace

unsigned RawSyntax::NextFreeNodeId = 1;

RawSyntax::RawSyntax(SyntaxKind Kind, ArrayRef<RC<RawSyntax>> Layout,
SourcePresence Presence, bool ManualMemory) {
SourcePresence Presence, bool ManualMemory,
llvm::Optional<unsigned> NodeId) {
assert(Kind != SyntaxKind::Token &&
"'token' syntax node must be constructed with dedicated constructor");
if (NodeId.hasValue()) {
this->NodeId = NodeId.getValue();
NextFreeNodeId = std::max(this->NodeId + 1, NextFreeNodeId);
} else {
this->NodeId = NextFreeNodeId++;
}
Copy link
Member

@rintaro rintaro Jul 13, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the above comment (Receive unsigned NodeId) is true, you could factor this out:

static unsigned claimNodeId(unsigned NodeId) {
  if (!NodeId)
    return NextFreeNodeId++;
    
  NextFreeNodeId = std::max(NodeId + 1, NextFreeNodeId);
  return NodeId;
}

then call it from here and token constructor.

this->NodeId = claimNodeId(NodeId);

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See above.

Bits.Common.Kind = unsigned(Kind);
Bits.Common.Presence = unsigned(Presence);
Bits.Common.ManualMemory = unsigned(ManualMemory);
Expand All @@ -92,7 +101,14 @@ RawSyntax::RawSyntax(SyntaxKind Kind, ArrayRef<RC<RawSyntax>> Layout,
RawSyntax::RawSyntax(tok TokKind, OwnedString Text,
ArrayRef<TriviaPiece> LeadingTrivia,
ArrayRef<TriviaPiece> TrailingTrivia,
SourcePresence Presence, bool ManualMemory) {
SourcePresence Presence, bool ManualMemory,
llvm::Optional<unsigned> NodeId) {
if (NodeId.hasValue()) {
this->NodeId = NodeId.getValue();
NextFreeNodeId = std::max(this->NodeId + 1, NextFreeNodeId);
} else {
this->NodeId = NextFreeNodeId++;
}
Bits.Common.Kind = unsigned(SyntaxKind::Token);
Bits.Common.Presence = unsigned(Presence);
Bits.Common.ManualMemory = unsigned(ManualMemory);
Expand Down Expand Up @@ -126,25 +142,28 @@ RawSyntax::~RawSyntax() {
}

RC<RawSyntax> RawSyntax::make(SyntaxKind Kind, ArrayRef<RC<RawSyntax>> Layout,
SourcePresence Presence, SyntaxArena *Arena) {
SourcePresence Presence, SyntaxArena *Arena,
llvm::Optional<unsigned> NodeId) {
auto size = totalSizeToAlloc<RC<RawSyntax>, OwnedString, TriviaPiece>(
Layout.size(), 0, 0);
void *data = Arena ? Arena->AllocateRawSyntax(size, alignof(RawSyntax))
: ::operator new(size);
return RC<RawSyntax>(new (data)
RawSyntax(Kind, Layout, Presence, bool(Arena)));
return RC<RawSyntax>(
new (data) RawSyntax(Kind, Layout, Presence, bool(Arena), NodeId));
}

RC<RawSyntax> RawSyntax::make(tok TokKind, OwnedString Text,
ArrayRef<TriviaPiece> LeadingTrivia,
ArrayRef<TriviaPiece> TrailingTrivia,
SourcePresence Presence, SyntaxArena *Arena) {
SourcePresence Presence, SyntaxArena *Arena,
llvm::Optional<unsigned> NodeId) {
auto size = totalSizeToAlloc<RC<RawSyntax>, OwnedString, TriviaPiece>(
0, 1, LeadingTrivia.size() + TrailingTrivia.size());
void *data = Arena ? Arena->AllocateRawSyntax(size, alignof(RawSyntax))
: ::operator new(size);
return RC<RawSyntax>(new (data) RawSyntax(
TokKind, Text, LeadingTrivia, TrailingTrivia, Presence, bool(Arena)));
return RC<RawSyntax>(new (data) RawSyntax(TokKind, Text, LeadingTrivia,
TrailingTrivia, Presence,
bool(Arena), NodeId));
}

RC<RawSyntax> RawSyntax::append(RC<RawSyntax> NewLayoutElement) const {
Expand Down
Loading