Skip to content

Move Syntax Gyb Infrastructure Into swift-syntax #663

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Sep 1, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Sources/SwiftParser/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ There are a number of implementation tasks involved in realizing the parser, whi
* [ ] Tooling and automation
* [ ] Improve test harness to easily check the expected parse trees + diagnostics
* [ ] Migrate [attribute definitions](https://github.com/apple/swift/blob/main/include/swift/AST/Attr.def) from C++ to Swift
* [ ] Migrate [grammar definition](https://github.com/apple/swift/tree/main/utils/gyb_syntax_support) to the swift-syntax repository
* [x] Migrate [grammar definition](https://github.com/apple/swift/tree/main/utils/gyb_syntax_support) to the swift-syntax repository
* [ ] Migrate grammar definition from Python to Swift
* [ ] Replace uses of gyb with SwiftSyntax-based generation
* [ ] Auditing the grammar to ensure that it accurately represents the Swift grammar
Expand Down

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
from .Child import Child
from .Node import Node # noqa: I201

AVAILABILITY_NODES = [
# availability-spec-list -> availability-entry availability-spec-list?
Node('AvailabilitySpecList', name_for_diagnostics="'@availability' arguments",
kind='SyntaxCollection', element='AvailabilityArgument'),

# Wrapper for all the different entries that may occur inside @available
# availability-entry -> '*' ','?
# | identifier ','?
# | availability-version-restriction ','?
# | availability-versioned-argument ','?
Node('AvailabilityArgument', name_for_diagnostics="'@available' argument",
kind='Syntax',
description='''
A single argument to an `@available` argument like `*`, `iOS 10.1`,
or `message: "This has been deprecated"`.
''',
children=[
Child('Entry', kind='Syntax',
description='The actual argument',
node_choices=[
Child('Star', kind='SpacedBinaryOperatorToken',
text_choices=['*']),
Child('IdentifierRestriction',
kind='IdentifierToken'),
Child('AvailabilityVersionRestriction',
kind='AvailabilityVersionRestriction'),
Child('AvailabilityLabeledArgument',
kind='AvailabilityLabeledArgument'),
]),
Child('TrailingComma', kind='CommaToken', is_optional=True,
description='''
A trailing comma if the argument is followed by another
argument
'''),
]),

# Representation of 'deprecated: 2.3', 'message: "Hello world"' etc.
# availability-versioned-argument -> identifier ':' version-tuple
Node('AvailabilityLabeledArgument', name_for_diagnostics="'@available' argument",
kind='Syntax',
description='''
A argument to an `@available` attribute that consists of a label and
a value, e.g. `message: "This has been deprecated"`.
''',
children=[
Child('Label', kind='IdentifierToken',
description='The label of the argument'),
Child('Colon', kind='ColonToken',
description='The colon separating label and value'),
Child('Value', kind='Syntax',
node_choices=[
Child('String', 'StringLiteralToken'),
Child('Version', 'VersionTuple'),
], description='The value of this labeled argument',),
]),

# Representation for 'iOS 10', 'swift 3.4' etc.
# availability-version-restriction -> identifier version-tuple
Node('AvailabilityVersionRestriction', name_for_diagnostics="'@available' argument",
kind='Syntax',
description='''
An argument to `@available` that restricts the availability on a
certain platform to a version, e.g. `iOS 10` or `swift 3.4`.
''',
children=[
Child('Platform', kind='IdentifierToken',
classification='Keyword',
description='''
The name of the OS on which the availability should be
restricted or 'swift' if the availability should be
restricted based on a Swift version.
'''),
Child('Version', kind='VersionTuple', is_optional=True),
]),

# version-tuple -> integer-literal
# | float-literal
# | float-literal '.' integer-literal
Node('VersionTuple', name_for_diagnostics='version tuple', kind='Syntax',
description='''
A version number of the form major.minor.patch in which the minor
and patch part may be omitted.
''',
children=[
Child('MajorMinor', kind='Syntax',
node_choices=[
Child('Major', kind='IntegerLiteralToken'),
Child('MajorMinor', kind='FloatingLiteralToken')
], description='''
In case the version consists only of the major version, an
integer literal that specifies the major version. In case
the version consists of major and minor version number, a
floating literal in which the decimal part is interpreted
as the minor version.
'''),
Child('PatchPeriod', kind='PeriodToken', is_optional=True,
description='''
If the version contains a patch number, the period
separating the minor from the patch number.
'''),
Child('PatchVersion', kind='IntegerLiteralToken',
is_optional=True, description='''
The patch version if specified.
'''),
]),
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
# flake8: noqa I201
from .Classification import classification_by_name
from .Token import SYNTAX_TOKEN_MAP
from .kinds import SYNTAX_BASE_KINDS, kind_to_type, lowercase_first_word


class Child(object):
"""
A child of a node, that may be declared optional or a token with a
restricted subset of acceptable kinds or texts.
"""
def __init__(self, name, kind, description=None, is_optional=False,
token_choices=None, text_choices=None, node_choices=None,
collection_element_name=None,
classification=None, force_classification=False,
is_indented=False, requires_leading_newline=False):
"""
If a classification is passed, it specifies the color identifiers in
that subtree should inherit for syntax coloring. Must be a member of
SyntaxClassification in SyntaxClassifier.h.gyb
If force_classification is also set to true, all child nodes (not only
identifiers) inherit the syntax classification.
"""
self.name = name
self.swift_name = lowercase_first_word(name)
self.syntax_kind = kind
self.description = description
self.swift_syntax_kind = lowercase_first_word(self.syntax_kind)
self.type_name = kind_to_type(self.syntax_kind)
self.collection_element_name = collection_element_name
self.classification = classification_by_name(classification)
self.force_classification = force_classification
self.is_indented = is_indented
self.requires_leading_newline = requires_leading_newline

# If the child ends with "token" in the kind, it's considered
# a token node. Grab the existing reference to that token from the
# global list.
self.token_kind = \
self.syntax_kind if self.syntax_kind.endswith("Token") else None
self.token = SYNTAX_TOKEN_MAP.get(self.token_kind)

self.is_optional = is_optional

# A restricted set of token kinds that will be accepted for this
# child.
self.token_choices = []
if self.token:
self.token_choices.append(self.token)
for choice in token_choices or []:
token = SYNTAX_TOKEN_MAP[choice]
self.token_choices.append(token)

# A list of valid text for tokens, if specified.
# This will force validation logic to check the text passed into the
# token against the choices.
self.text_choices = text_choices or []

# A list of valid choices for a child
self.node_choices = node_choices or []

# Check the choices are either empty or multiple
assert len(self.node_choices) != 1

# Check node choices are well-formed
for choice in self.node_choices:
assert not choice.is_optional, \
"node choice %s cannot be optional" % choice.name
assert not choice.node_choices, \
"node choice %s cannot have further choices" % choice.name

def is_token(self):
"""
Returns true if this child has a token kind.
"""
return self.token_kind is not None

def main_token(self):
"""
Returns the first choice from the token_choices if there are any,
otherwise returns None.
"""
if self.token_choices:
return self.token_choices[0]
return None

def is_unexpected_nodes(self):
return self.syntax_kind == 'UnexpectedNodes'
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
from .Utils import error
from .kinds import lowercase_first_word # noqa: I201


class SyntaxClassification(object):
'''
Represents a classification a token can receive for syntax highlighting.
'''

def __init__(self, name, description):
self.name = name
self.swift_name = lowercase_first_word(name)
self.description = description


SYNTAX_CLASSIFICATIONS = [
SyntaxClassification('None', description='''
The token should not receive syntax coloring.
'''),
SyntaxClassification('Keyword', description='''
A Swift keyword, including contextual keywords.
'''),
SyntaxClassification('Identifier', description='''
A generic identifier.
'''),
SyntaxClassification('TypeIdentifier', description='''
An identifier referring to a type.
'''),
SyntaxClassification('DollarIdentifier', description='''
An identifier starting with `$` like `$0`.
'''),
SyntaxClassification('IntegerLiteral', description='''
An integer literal.
'''),
SyntaxClassification('FloatingLiteral', description='''
A floating point literal.
'''),
SyntaxClassification('StringLiteral', description='''
A string literal including multiline string literals.
'''),
SyntaxClassification('StringInterpolationAnchor', description='''
The opening and closing parenthesis of string interpolation.
'''),
SyntaxClassification('PoundDirectiveKeyword', description='''
A `#` keyword like `#warning`.
'''),
SyntaxClassification('BuildConfigId', description='''
A build configuration directive like `#if`, `#elseif`, `#else`.
'''),
SyntaxClassification('Attribute', description='''
An attribute starting with an `@`.
'''),
SyntaxClassification('ObjectLiteral', description='''
An image, color, etc. literal.
'''),
SyntaxClassification('EditorPlaceholder', description='''
An editor placeholder of the form `<#content#>`
'''),
SyntaxClassification('LineComment', description='''
A line comment starting with `//`.
'''),
SyntaxClassification('DocLineComment', description='''
A doc line comment starting with `///`.
'''),
SyntaxClassification('BlockComment', description='''
A block comment starting with `/**` and ending with `*/.
'''),
SyntaxClassification('DocBlockComment', description='''
A doc block comment starting with `/**` and ending with `*/.
'''),
]


def classification_by_name(classification_name):
if classification_name is None:
return None
for classification in SYNTAX_CLASSIFICATIONS:
if classification.name == classification_name:
return classification
error("Unknown syntax classification '%s'" % classification_name)
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
from .Child import Child
from .Node import Node # noqa: I201

COMMON_NODES = [
Node('Decl', name_for_diagnostics='declaration', kind='Syntax'),
Node('Expr', name_for_diagnostics='expression', kind='Syntax'),
Node('Stmt', name_for_diagnostics='statement', kind='Syntax'),
Node('Type', name_for_diagnostics='type', kind='Syntax'),
Node('Pattern', name_for_diagnostics='pattern', kind='Syntax'),
Node('UnknownDecl', name_for_diagnostics='declaration', kind='Decl'),
Node('UnknownExpr', name_for_diagnostics='expression', kind='Expr'),
Node('UnknownStmt', name_for_diagnostics='statement', kind='Stmt'),
Node('UnknownType', name_for_diagnostics='type', kind='Type'),
Node('UnknownPattern', name_for_diagnostics='pattern', kind='Pattern'),
Node('Missing', name_for_diagnostics=None, kind='Syntax'),
Node('MissingDecl', name_for_diagnostics='declaration', kind='Decl', children=[
Child('Attributes', kind='AttributeList',
collection_element_name='Attribute', is_optional=True),
Child('Modifiers', kind='ModifierList',
collection_element_name='Modifier', is_optional=True),
]),
Node('MissingExpr', name_for_diagnostics='expression', kind='Expr'),
Node('MissingStmt', name_for_diagnostics='statement', kind='Stmt'),
Node('MissingType', name_for_diagnostics='type', kind='Type'),
Node('MissingPattern', name_for_diagnostics='pattern', kind='Pattern'),

# code-block-item = (decl | stmt | expr) ';'?
Node('CodeBlockItem', name_for_diagnostics=None, kind='Syntax',
omit_when_empty=True,
description="""
A CodeBlockItem is any Syntax node that appears on its own line inside
a CodeBlock.
""",
children=[
Child('Item', kind='Syntax',
description="The underlying node inside the code block.",
node_choices=[
Child('Decl', kind='Decl'),
Child('Stmt', kind='Stmt'),
Child('Expr', kind='Expr'),
Child('TokenList', kind='TokenList'),
Child('NonEmptyTokenList', kind='NonEmptyTokenList'),
]),
Child('Semicolon', kind='SemicolonToken',
description="""
If present, the trailing semicolon at the end of the item.
""",
is_optional=True),
Child('ErrorTokens', kind='Syntax', is_optional=True),
]),

# code-block-item-list -> code-block-item code-block-item-list?
Node('CodeBlockItemList', name_for_diagnostics=None,
kind='SyntaxCollection', element='CodeBlockItem',
elements_separated_by_newline=True),

# code-block -> '{' stmt-list '}'
Node('CodeBlock', name_for_diagnostics=None, kind='Syntax',
traits=['Braced', 'WithStatements'],
children=[
Child('LeftBrace', kind='LeftBraceToken'),
Child('Statements', kind='CodeBlockItemList',
collection_element_name='Statement', is_indented=True),
Child('RightBrace', kind='RightBraceToken',
requires_leading_newline=True),
]),

Node('UnexpectedNodes', name_for_diagnostics=None, kind='SyntaxCollection',
element='Syntax',
description='''
A collection of syntax nodes that occurred in the source code but
could not be used to form a valid syntax tree.
'''),
]
Loading