Skip to content

Commit 3898244

Browse files
committed
Move Syntax Gyb Infrastructure Into swift-syntax
Take ownership of the infrastructure that generates the syntax nodes. This greatly simplifies the modal workflow for amending syntax nodes as only SwiftSyntax strictly needs an update unless the change would be so drastic as to affect the legacy parser or unit tests.
1 parent 20d083d commit 3898244

20 files changed

+4361
-1
lines changed

Sources/SwiftParser/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ There are a number of implementation tasks involved in realizing the parser, whi
4747
* [ ] Tooling and automation
4848
* [ ] Improve test harness to easily check the expected parse trees + diagnostics
4949
* [ ] Migrate [attribute definitions](https://github.com/apple/swift/blob/main/include/swift/AST/Attr.def) from C++ to Swift
50-
* [ ] Migrate [grammar definition](https://github.com/apple/swift/tree/main/utils/gyb_syntax_support) to the swift-syntax repository
50+
* [x] Migrate [grammar definition](https://github.com/apple/swift/tree/main/utils/gyb_syntax_support) to the swift-syntax repository
5151
* [ ] Migrate grammar definition from Python to Swift
5252
* [ ] Replace uses of gyb with SwiftSyntax-based generation
5353
* [ ] Auditing the grammar to ensure that it accurately represents the Swift grammar

Sources/generate-swift-syntax-builder/gyb_syntax_support/AttributeNodes.py

Lines changed: 463 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
from .Child import Child
2+
from .Node import Node # noqa: I201
3+
4+
AVAILABILITY_NODES = [
5+
# availability-spec-list -> availability-entry availability-spec-list?
6+
Node('AvailabilitySpecList', kind='SyntaxCollection',
7+
element='AvailabilityArgument'),
8+
9+
# Wrapper for all the different entries that may occur inside @available
10+
# availability-entry -> '*' ','?
11+
# | identifier ','?
12+
# | availability-version-restriction ','?
13+
# | availability-versioned-argument ','?
14+
Node('AvailabilityArgument', kind='Syntax',
15+
description='''
16+
A single argument to an `@available` argument like `*`, `iOS 10.1`,
17+
or `message: "This has been deprecated"`.
18+
''',
19+
children=[
20+
Child('Entry', kind='Syntax',
21+
description='The actual argument',
22+
node_choices=[
23+
Child('Star', kind='SpacedBinaryOperatorToken',
24+
text_choices=['*']),
25+
Child('IdentifierRestriction',
26+
kind='IdentifierToken'),
27+
Child('AvailabilityVersionRestriction',
28+
kind='AvailabilityVersionRestriction'),
29+
Child('AvailabilityLabeledArgument',
30+
kind='AvailabilityLabeledArgument'),
31+
]),
32+
Child('TrailingComma', kind='CommaToken', is_optional=True,
33+
description='''
34+
A trailing comma if the argument is followed by another
35+
argument
36+
'''),
37+
]),
38+
39+
# Representation of 'deprecated: 2.3', 'message: "Hello world"' etc.
40+
# availability-versioned-argument -> identifier ':' version-tuple
41+
Node('AvailabilityLabeledArgument', kind='Syntax',
42+
description='''
43+
A argument to an `@available` attribute that consists of a label and
44+
a value, e.g. `message: "This has been deprecated"`.
45+
''',
46+
children=[
47+
Child('Label', kind='IdentifierToken',
48+
description='The label of the argument'),
49+
Child('Colon', kind='ColonToken',
50+
description='The colon separating label and value'),
51+
Child('Value', kind='Syntax',
52+
node_choices=[
53+
Child('String', 'StringLiteralToken'),
54+
Child('Version', 'VersionTuple'),
55+
], description='The value of this labeled argument',),
56+
]),
57+
58+
# Representation for 'iOS 10', 'swift 3.4' etc.
59+
# availability-version-restriction -> identifier version-tuple
60+
Node('AvailabilityVersionRestriction', kind='Syntax',
61+
description='''
62+
An argument to `@available` that restricts the availability on a
63+
certain platform to a version, e.g. `iOS 10` or `swift 3.4`.
64+
''',
65+
children=[
66+
Child('Platform', kind='IdentifierToken',
67+
classification='Keyword',
68+
description='''
69+
The name of the OS on which the availability should be
70+
restricted or 'swift' if the availability should be
71+
restricted based on a Swift version.
72+
'''),
73+
Child('Version', kind='VersionTuple', is_optional=True),
74+
]),
75+
76+
# version-tuple -> integer-literal
77+
# | float-literal
78+
# | float-literal '.' integer-literal
79+
Node('VersionTuple', kind='Syntax',
80+
description='''
81+
A version number of the form major.minor.patch in which the minor
82+
and patch part may be omitted.
83+
''',
84+
children=[
85+
Child('MajorMinor', kind='Syntax',
86+
node_choices=[
87+
Child('Major', kind='IntegerLiteralToken'),
88+
Child('MajorMinor', kind='FloatingLiteralToken')
89+
], description='''
90+
In case the version consists only of the major version, an
91+
integer literal that specifies the major version. In case
92+
the version consists of major and minor version number, a
93+
floating literal in which the decimal part is interpreted
94+
as the minor version.
95+
'''),
96+
Child('PatchPeriod', kind='PeriodToken', is_optional=True,
97+
description='''
98+
If the version contains a patch number, the period
99+
separating the minor from the patch number.
100+
'''),
101+
Child('PatchVersion', kind='IntegerLiteralToken',
102+
is_optional=True, description='''
103+
The patch version if specified.
104+
'''),
105+
]),
106+
]
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
# flake8: noqa I201
2+
from .Classification import classification_by_name
3+
from .Token import SYNTAX_TOKEN_MAP
4+
from .kinds import SYNTAX_BASE_KINDS, kind_to_type, lowercase_first_word
5+
6+
7+
class Child(object):
8+
"""
9+
A child of a node, that may be declared optional or a token with a
10+
restricted subset of acceptable kinds or texts.
11+
"""
12+
def __init__(self, name, kind, description=None, is_optional=False,
13+
token_choices=None, text_choices=None, node_choices=None,
14+
collection_element_name=None,
15+
classification=None, force_classification=False,
16+
is_indented=False, requires_leading_newline=False):
17+
"""
18+
If a classification is passed, it specifies the color identifiers in
19+
that subtree should inherit for syntax coloring. Must be a member of
20+
SyntaxClassification in SyntaxClassifier.h.gyb
21+
If force_classification is also set to true, all child nodes (not only
22+
identifiers) inherit the syntax classification.
23+
"""
24+
self.name = name
25+
self.swift_name = lowercase_first_word(name)
26+
self.syntax_kind = kind
27+
self.description = description
28+
self.swift_syntax_kind = lowercase_first_word(self.syntax_kind)
29+
self.type_name = kind_to_type(self.syntax_kind)
30+
self.collection_element_name = collection_element_name
31+
self.classification = classification_by_name(classification)
32+
self.force_classification = force_classification
33+
self.is_indented = is_indented
34+
self.requires_leading_newline = requires_leading_newline
35+
36+
# If the child ends with "token" in the kind, it's considered
37+
# a token node. Grab the existing reference to that token from the
38+
# global list.
39+
self.token_kind = \
40+
self.syntax_kind if self.syntax_kind.endswith("Token") else None
41+
self.token = SYNTAX_TOKEN_MAP.get(self.token_kind)
42+
43+
self.is_optional = is_optional
44+
45+
# A restricted set of token kinds that will be accepted for this
46+
# child.
47+
self.token_choices = []
48+
if self.token:
49+
self.token_choices.append(self.token)
50+
for choice in token_choices or []:
51+
token = SYNTAX_TOKEN_MAP[choice]
52+
self.token_choices.append(token)
53+
54+
# A list of valid text for tokens, if specified.
55+
# This will force validation logic to check the text passed into the
56+
# token against the choices.
57+
self.text_choices = text_choices or []
58+
59+
# A list of valid choices for a child
60+
self.node_choices = node_choices or []
61+
62+
# Check the choices are either empty or multiple
63+
assert len(self.node_choices) != 1
64+
65+
# Check node choices are well-formed
66+
for choice in self.node_choices:
67+
assert not choice.is_optional, \
68+
"node choice %s cannot be optional" % choice.name
69+
assert not choice.node_choices, \
70+
"node choice %s cannot have further choices" % choice.name
71+
72+
def is_token(self):
73+
"""
74+
Returns true if this child has a token kind.
75+
"""
76+
return self.token_kind is not None
77+
78+
def main_token(self):
79+
"""
80+
Returns the first choice from the token_choices if there are any,
81+
otherwise returns None.
82+
"""
83+
if self.token_choices:
84+
return self.token_choices[0]
85+
return None
86+
87+
def is_unexpected_nodes(self):
88+
return self.syntax_kind == 'UnexpectedNodes'
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
from .Utils import error
2+
from .kinds import lowercase_first_word # noqa: I201
3+
4+
5+
class SyntaxClassification(object):
6+
'''
7+
Represents a classification a token can receive for syntax highlighting.
8+
'''
9+
10+
def __init__(self, name, description):
11+
self.name = name
12+
self.swift_name = lowercase_first_word(name)
13+
self.description = description
14+
15+
16+
SYNTAX_CLASSIFICATIONS = [
17+
SyntaxClassification('None', description='''
18+
The token should not receive syntax coloring.
19+
'''),
20+
SyntaxClassification('Keyword', description='''
21+
A Swift keyword, including contextual keywords.
22+
'''),
23+
SyntaxClassification('Identifier', description='''
24+
A generic identifier.
25+
'''),
26+
SyntaxClassification('TypeIdentifier', description='''
27+
An identifier referring to a type.
28+
'''),
29+
SyntaxClassification('DollarIdentifier', description='''
30+
An identifier starting with `$` like `$0`.
31+
'''),
32+
SyntaxClassification('IntegerLiteral', description='''
33+
An integer literal.
34+
'''),
35+
SyntaxClassification('FloatingLiteral', description='''
36+
A floating point literal.
37+
'''),
38+
SyntaxClassification('StringLiteral', description='''
39+
A string literal including multiline string literals.
40+
'''),
41+
SyntaxClassification('StringInterpolationAnchor', description='''
42+
The opening and closing parenthesis of string interpolation.
43+
'''),
44+
SyntaxClassification('PoundDirectiveKeyword', description='''
45+
A `#` keyword like `#warning`.
46+
'''),
47+
SyntaxClassification('BuildConfigId', description='''
48+
A build configuration directive like `#if`, `#elseif`, `#else`.
49+
'''),
50+
SyntaxClassification('Attribute', description='''
51+
An attribute starting with an `@`.
52+
'''),
53+
SyntaxClassification('ObjectLiteral', description='''
54+
An image, color, etc. literal.
55+
'''),
56+
SyntaxClassification('EditorPlaceholder', description='''
57+
An editor placeholder of the form `<#content#>`
58+
'''),
59+
SyntaxClassification('LineComment', description='''
60+
A line comment starting with `//`.
61+
'''),
62+
SyntaxClassification('DocLineComment', description='''
63+
A doc line comment starting with `///`.
64+
'''),
65+
SyntaxClassification('BlockComment', description='''
66+
A block comment starting with `/**` and ending with `*/.
67+
'''),
68+
SyntaxClassification('DocBlockComment', description='''
69+
A doc block comment starting with `/**` and ending with `*/.
70+
'''),
71+
]
72+
73+
74+
def classification_by_name(classification_name):
75+
if classification_name is None:
76+
return None
77+
for classification in SYNTAX_CLASSIFICATIONS:
78+
if classification.name == classification_name:
79+
return classification
80+
error("Unknown syntax classification '%s'" % classification_name)
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
from .Child import Child
2+
from .Node import Node # noqa: I201
3+
4+
COMMON_NODES = [
5+
Node('Decl', kind='Syntax'),
6+
Node('Expr', kind='Syntax'),
7+
Node('Stmt', kind='Syntax'),
8+
Node('Type', kind='Syntax'),
9+
Node('Pattern', kind='Syntax'),
10+
Node('UnknownDecl', kind='Decl'),
11+
Node('UnknownExpr', kind='Expr'),
12+
Node('UnknownStmt', kind='Stmt'),
13+
Node('UnknownType', kind='Type'),
14+
Node('UnknownPattern', kind='Pattern'),
15+
Node('Missing', kind='Syntax'),
16+
Node('MissingDecl', kind='Decl', children=[
17+
Child('Attributes', kind='AttributeList',
18+
collection_element_name='Attribute', is_optional=True),
19+
Child('Modifiers', kind='ModifierList',
20+
collection_element_name='Modifier', is_optional=True),
21+
]),
22+
Node('MissingExpr', kind='Expr'),
23+
Node('MissingStmt', kind='Stmt'),
24+
Node('MissingType', kind='Type'),
25+
Node('MissingPattern', kind='Pattern'),
26+
27+
# code-block-item = (decl | stmt | expr) ';'?
28+
Node('CodeBlockItem', kind='Syntax', omit_when_empty=True,
29+
description="""
30+
A CodeBlockItem is any Syntax node that appears on its own line inside
31+
a CodeBlock.
32+
""",
33+
children=[
34+
Child('Item', kind='Syntax',
35+
description="The underlying node inside the code block.",
36+
node_choices=[
37+
Child('Decl', kind='Decl'),
38+
Child('Stmt', kind='Stmt'),
39+
Child('Expr', kind='Expr'),
40+
Child('TokenList', kind='TokenList'),
41+
Child('NonEmptyTokenList', kind='NonEmptyTokenList'),
42+
]),
43+
Child('Semicolon', kind='SemicolonToken',
44+
description="""
45+
If present, the trailing semicolon at the end of the item.
46+
""",
47+
is_optional=True),
48+
Child('ErrorTokens', kind='Syntax', is_optional=True),
49+
]),
50+
51+
# code-block-item-list -> code-block-item code-block-item-list?
52+
Node('CodeBlockItemList', kind='SyntaxCollection',
53+
element='CodeBlockItem', elements_separated_by_newline=True),
54+
55+
# code-block -> '{' stmt-list '}'
56+
Node('CodeBlock', kind='Syntax',
57+
traits=['Braced', 'WithStatements'],
58+
children=[
59+
Child('LeftBrace', kind='LeftBraceToken'),
60+
Child('Statements', kind='CodeBlockItemList',
61+
collection_element_name='Statement', is_indented=True),
62+
Child('RightBrace', kind='RightBraceToken',
63+
requires_leading_newline=True),
64+
]),
65+
66+
Node('UnexpectedNodes', kind='SyntaxCollection', element='Syntax',
67+
description='''
68+
A collection of syntax nodes that occurred in the source code but
69+
could not be used to form a valid syntax tree.
70+
'''),
71+
]

0 commit comments

Comments
 (0)