Skip to content

[Syntax] Unify RawSyntax and RawTokenSyntax using union and TrailingObjects #13990

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 18, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions include/swift/Parse/Parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ namespace swift {

namespace syntax {
class AbsolutePosition;
struct RawTokenSyntax;
class RawSyntax;
enum class SyntaxKind;
class TypeSyntax;
}// end of syntax namespace
Expand Down Expand Up @@ -1435,7 +1435,7 @@ bool isKeywordPossibleDeclStart(const Token &Tok);

/// \brief Lex and return a vector of `TokenSyntax` tokens, which include
/// leading and trailing trivia.
std::vector<std::pair<RC<syntax::RawTokenSyntax>,
std::vector<std::pair<RC<syntax::RawSyntax>,
syntax::AbsolutePosition>>
tokenizeWithTrivia(const LangOptions &LangOpts,
const SourceManager &SM,
Expand Down
272 changes: 193 additions & 79 deletions include/swift/Syntax/RawSyntax.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,15 @@
#ifndef SWIFT_SYNTAX_RAWSYNTAX_H
#define SWIFT_SYNTAX_RAWSYNTAX_H

#include "swift/Basic/InlineBitfield.h"
#include "swift/Syntax/References.h"
#include "swift/Syntax/SyntaxKind.h"
#include "swift/Syntax/TokenKinds.h"
#include "swift/Syntax/Trivia.h"
#include "llvm/ADT/IntrusiveRefCntPtr.h"
#include "llvm/ADT/PointerUnion.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/TrailingObjects.h"
#include "llvm/Support/raw_ostream.h"

#include <vector>
Expand All @@ -53,16 +56,17 @@ using llvm::StringRef;
#define syntax_assert_child_token(Raw, CursorName, ...) \
({ \
bool __Found = false; \
auto __Token = cast<RawTokenSyntax>(Raw->getChild(Cursor::CursorName)); \
auto __Token = Raw->getChild(Cursor::CursorName); \
assert(__Token->isToken()); \
if (__Token->isPresent()) { \
for (auto Token : {__VA_ARGS__}) { \
if (__Token->getTokenKind() == Token) { \
__Found = true; \
break; \
} \
} \
assert(__Found && "invalid token supplied for " \
#CursorName ", expected one of {" #__VA_ARGS__ "}"); \
assert(__Found && "invalid token supplied for " #CursorName \
", expected one of {" #__VA_ARGS__ "}"); \
} \
})
#else
Expand All @@ -72,18 +76,19 @@ using llvm::StringRef;
#ifndef NDEBUG
#define syntax_assert_child_token_text(Raw, CursorName, TokenKind, ...) \
({ \
bool __Found = false; \
auto __Child = cast<RawTokenSyntax>(Raw->getChild(Cursor::CursorName)); \
if (__Child->isPresent()) { \
bool __Found = false; \
auto __Child = Raw->getChild(Cursor::CursorName); \
assert(__Child->isToken()); \
if (__Child->isPresent()) { \
assert(__Child->getTokenKind() == TokenKind); \
for (auto __Text : {__VA_ARGS__}) { \
if (__Child->getText() == __Text) { \
if (__Child->getTokenText() == __Text) { \
__Found = true; \
break; \
} \
} \
assert(__Found && "invalid text supplied for " \
#CursorName ", expected one of {" #__VA_ARGS__ "}"); \
assert(__Found && "invalid text supplied for " #CursorName \
", expected one of {" #__VA_ARGS__ "}"); \
} \
})
#else
Expand Down Expand Up @@ -158,22 +163,6 @@ class AbsolutePosition {
}
}

/// Use some character as a reference for adding to the absolute position,
/// taking note of newlines, etc.
/// Take care that consecutive call of this function with '\r' and '\n'
/// causes increase of 2 Line but desirable result may be 1 Line.
void addCharacter(char C) {
switch (C) {
case '\n':
case '\r':
addNewlines(1, 1);
break;
default:
addColumns(1);
break;
}
}

/// Get the line number of this position.
uint32_t getLine() const { return Line; }

Expand Down Expand Up @@ -218,95 +207,220 @@ struct SyntaxPrintOptions {
/// RawSyntax - the strictly immutable, shared backing nodes for all syntax.
///
/// This is implementation detail - do not expose it in public API.
struct RawSyntax : public llvm::ThreadSafeRefCountedBase<RawSyntax> {

using LayoutList = std::vector<RC<RawSyntax>>;

/// The kind of syntax this node represents.
const SyntaxKind Kind;

/// The "layout" of the node - representing the children, or the terms
/// in the production of the grammar.
const LayoutList Layout;

/// Whether this piece of syntax was actually present in the source.
const SourcePresence Presence;
class RawSyntax final
: public llvm::ThreadSafeRefCountedBase<RawSyntax>,
private llvm::TrailingObjects<RawSyntax, RC<RawSyntax>, OwnedString,
TriviaPiece> {
friend TrailingObjects;

union {
uint64_t Clear;
struct {
/// The kind of syntax this node represents.
unsigned Kind : bitmax(NumSyntaxKindBits, 8);
/// Whether this piece of syntax was actually present in the source.
unsigned Presence : 1;
};
enum { NumRawSyntaxBits = bitmax(NumSyntaxKindBits, 8) + 1 };

// For "layout" nodes.
struct {
uint64_t : bitmax(NumRawSyntaxBits, 32);
/// Number of children this "layout" node has.
unsigned NumChildren : 32;
};

// For "token" nodes.
struct {
uint64_t : bitmax(NumRawSyntaxBits, 16);
/// The kind of token this "token" node represents.
unsigned TokenKind : 16;
/// Number of leading trivia pieces.
unsigned NumLeadingTrivia : 16;
/// Number of trailing trivia pieces.
unsigned NumTrailingTrivia : 16;
};
} Bits;

size_t numTrailingObjects(OverloadToken<RC<RawSyntax>>) const {
return isToken() ? 0 : Bits.NumChildren;
}
size_t numTrailingObjects(OverloadToken<OwnedString>) const {
return isToken() ? 1 : 0;
}
size_t numTrailingObjects(OverloadToken<TriviaPiece>) const {
return isToken() ? Bits.NumLeadingTrivia + Bits.NumTrailingTrivia : 0;
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not a super big deal but we shouldn't need to define numTrailingObjects for the last trailing object type. It will never get called.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, but I'd rather keep this so we can see how we calculate the number of trailing objects.


/// Create a piece of raw syntax.
RawSyntax(const SyntaxKind Kind, const std::vector<RC<RawSyntax>> Layout,
const SourcePresence Presence)
: Kind(Kind), Layout(Layout), Presence(Presence) {}
RawSyntax(SyntaxKind Kind, ArrayRef<RC<RawSyntax>> Layout,
SourcePresence Presence);
RawSyntax(tok TokKind, OwnedString Text, SourcePresence Presence,
ArrayRef<TriviaPiece> LeadingTrivia,
ArrayRef<TriviaPiece> TrailingTrivia);

virtual ~RawSyntax() = default;
public:
~RawSyntax();

/// Returns a raw syntax node of the given Kind, specified Layout,
/// and source presence.
static RC<RawSyntax> make(const SyntaxKind Kind, const LayoutList Layout,
const SourcePresence Presence) {
return RC<RawSyntax>{new RawSyntax{Kind, Layout, Presence}};
/// \name Factory methods.
/// @{

/// Make a raw "layout" syntax node.
static RC<RawSyntax> make(SyntaxKind Kind, ArrayRef<RC<RawSyntax>> Layout,
SourcePresence Presence);

/// Make a raw "token" syntax node.
static RC<RawSyntax> make(tok TokKind, OwnedString Text,
SourcePresence Presence,
ArrayRef<TriviaPiece> LeadingTrivia,
ArrayRef<TriviaPiece> TrailingTrivia);

/// Make a missing raw "layout" syntax node.
static RC<RawSyntax> missing(SyntaxKind Kind) {
return make(Kind, {}, SourcePresence::Missing);
}

/// Returns a raw syntax node of the given Kind, marked as missing.
static RC<RawSyntax> missing(const SyntaxKind Kind) {
return make(Kind, {}, SourcePresence::Missing);
/// Make a missing raw "token" syntax node.
static RC<RawSyntax> missing(tok TokKind, OwnedString Text) {
return make(TokKind, Text, SourcePresence::Missing,
ArrayRef<TriviaPiece>{}, ArrayRef<TriviaPiece>{});
}

/// Get a child based on a particular node's "Cursor", indicating
/// the position of the terms in the production of the Swift grammar.
template <typename CursorType> RC<RawSyntax> getChild(CursorType C) const {
return Layout[cursorIndex(C)];
/// @}

SourcePresence getPresence() const {
return static_cast<SourcePresence>(Bits.Presence);
}

SyntaxKind getKind() const { return static_cast<SyntaxKind>(Bits.Kind); }

/// Returns true if the node is "missing" in the source (i.e. it was
/// expected (or optional) but not written.
bool isMissing() const { return Presence == SourcePresence::Missing; }
bool isMissing() const { return getPresence() == SourcePresence::Missing; }

/// Returns true if the node is "present" in the source.
bool isPresent() const {
return Presence == SourcePresence::Present;
}
bool isPresent() const { return getPresence() == SourcePresence::Present; }

/// Returns true if this raw syntax node is some kind of declaration.
bool isDecl() const { return isDeclKind(Kind); }
bool isDecl() const { return isDeclKind(getKind()); }

/// Returns true if this raw syntax node is some kind of type syntax.
bool isType() const { return isTypeKind(Kind); }
bool isType() const { return isTypeKind(getKind()); }

/// Returns true if this raw syntax node is some kind of statement.
bool isStmt() const { return isStmtKind(Kind); }
bool isStmt() const { return isStmtKind(getKind()); }

/// Returns true if this raw syntax node is some kind of expression.
bool isExpr() const { return isExprKind(Kind); }
bool isExpr() const { return isExprKind(getKind()); }

/// Returns true if this raw syntax node is some kind of pattern.
bool isPattern() const { return isPatternKind(Kind); }
bool isPattern() const { return isPatternKind(getKind()); }

/// Return true is this raw syntax node is a unknown node.
bool isUnknown() const { return isUnknownKind(getKind()); }

/// Return true if this raw syntax node is a token.
bool isToken() const { return isTokenKind(Kind); }
bool isToken() const { return isTokenKind(getKind()); }

/// \name Getter routines for SyntaxKind::Token.
/// @{

bool isUnknown() const { return isUnknownKind(Kind); }
/// Get the kind of the token.
tok getTokenKind() const {
assert(isToken());
return static_cast<tok>(Bits.TokenKind);
}

/// Return the text of the token.
StringRef getTokenText() const {
assert(isToken());
return getTrailingObjects<OwnedString>()->str();
}

/// Return the leading trivia list of the token.
ArrayRef<TriviaPiece> getLeadingTrivia() const {
assert(isToken());
return {getTrailingObjects<TriviaPiece>(), Bits.NumLeadingTrivia};
}
/// Return the trailing trivia list of the token.
ArrayRef<TriviaPiece> getTrailingTrivia() const {
assert(isToken());
return {getTrailingObjects<TriviaPiece>() + Bits.NumLeadingTrivia,
Bits.NumTrailingTrivia};
}

/// Return \c true if this is the given kind of token.
bool isToken(tok K) const { return isToken() && getTokenKind() == K; }

/// @}

/// \name Transform routines for "token" nodes.
/// @{

/// Return a new token like this one, but with the given leading
/// trivia instead.
RC<RawSyntax>
withLeadingTrivia(ArrayRef<TriviaPiece> NewLeadingTrivia) const {
return make(getTokenKind(), getTokenText(), getPresence(),
NewLeadingTrivia, getTrailingTrivia());
}

RC<RawSyntax> withLeadingTrivia(Trivia NewLeadingTrivia) const {
return withLeadingTrivia(NewLeadingTrivia.Pieces);
}

/// Return a new token like this one, but with the given trailing
/// trivia instead.
RC<RawSyntax>
withTrailingTrivia(ArrayRef<TriviaPiece> NewTrailingTrivia) const {
return make(getTokenKind(), getTokenText(), getPresence(),
getLeadingTrivia(), NewTrailingTrivia);
}

RC<RawSyntax> withTrailingTrivia(Trivia NewTrailingTrivia) const {
return withTrailingTrivia(NewTrailingTrivia.Pieces);
}

/// @}

/// \name Getter routines for "layout" nodes.
/// @{

/// Get the child nodes.
ArrayRef<RC<RawSyntax>> getLayout() const {
if (isToken())
return {};
return {getTrailingObjects<RC<RawSyntax>>(), Bits.NumChildren};
}

/// Get a child based on a particular node's "Cursor", indicating
/// the position of the terms in the production of the Swift grammar.
const RC<RawSyntax> &getChild(CursorIndex Index) const {
return getLayout()[Index];
}

/// @}

/// \name Transform routines for "layout" nodes.
/// @{

/// Return a new raw syntax node with the given new layout element appended
/// to the end of the node's layout.
RC<RawSyntax> append(RC<RawSyntax> NewLayoutElement) const;

/// Return a new raw syntax node with the given new layout element replacing
/// another at some cursor position.
template <typename CursorType>
RC<RawSyntax>
replaceChild(CursorType C, RC<RawSyntax> NewLayoutElement) const {
LayoutList NewLayout;
replaceChild(CursorIndex Index, RC<RawSyntax> NewLayoutElement) const;

std::copy(Layout.begin(), Layout.begin() + cursorIndex(C),
std::back_inserter(NewLayout));
/// @}

NewLayout.push_back(NewLayoutElement);

std::copy(Layout.begin() + cursorIndex(C) + 1, Layout.end(),
std::back_inserter(NewLayout));

return RawSyntax::make(Kind, NewLayout, Presence);
}
/// Advance the provided AbsolutePosition by the full width of this node.
///
/// If this is token node, returns the AbsolutePosition of the start of the
/// token's nontrivial text. Otherwise, return the position of the first
/// token. If this contains no tokens, return None.
llvm::Optional<AbsolutePosition>
accumulateAbsolutePosition(AbsolutePosition &Pos) const;

/// Print this piece of syntax recursively.
void print(llvm::raw_ostream &OS, SyntaxPrintOptions Opts) const;
Expand All @@ -315,7 +429,7 @@ struct RawSyntax : public llvm::ThreadSafeRefCountedBase<RawSyntax> {
void dump() const;

/// Dump this piece of syntax recursively.
void dump(llvm::raw_ostream &OS, unsigned Indent) const;
void dump(llvm::raw_ostream &OS, unsigned Indent = 0) const;
};

} // end namespace syntax
Expand Down
Loading