Skip to content

Commit 80c0331

Browse files
committed
Introduce RawSyntaxData.ParsedToken
A new RawSyntax data variation. This is intended to be used for parsed tokens. Actual usage will be in follow-ups. In this variation, unlike the existing `MaterializedToken`, leading/trailing trivia are stored in a SyntaxText form, and are lazily materialized by a function registered in `SyntaxArena`. Since the parser doesn't need to materialize them while parsing, it'd gain the "parsing" performance boost, in exchange for a performance reduction when accessing trivia.
1 parent 23d38a1 commit 80c0331

File tree

6 files changed

+156
-20
lines changed

6 files changed

+156
-20
lines changed

Sources/SwiftSyntax/RawSyntax.swift

Lines changed: 88 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,21 @@ typealias RawTriviaPieceBuffer = UnsafeBufferPointer<RawTriviaPiece>
1616
/// Node data for RawSyntax tree. Tagged union plus common data.
1717
internal struct RawSyntaxData {
1818
internal enum Payload {
19+
case parsedToken(ParsedToken)
1920
case materializedToken(MaterializedToken)
2021
case layout(Layout)
2122
}
2223

24+
/// Token with lazy trivia parsing.
25+
///
26+
/// The RawSyntax's `arena` must have a valid trivia parsing function to
27+
/// lazily materialize the leading/trailing trivia.
28+
struct ParsedToken {
29+
var tokenKind: RawTokenKind
30+
var wholeText: SyntaxText
31+
var textRange: Range<SyntaxText.Index>
32+
}
33+
2334
/// Token typically created with `TokenSyntax.<someToken>`.
2435
struct MaterializedToken {
2536
var tokenKind: RawTokenKind
@@ -42,6 +53,18 @@ internal struct RawSyntaxData {
4253
fileprivate var arenaReference: SyntaxArenaRef
4354
}
4455

56+
extension RawSyntaxData.ParsedToken {
57+
var tokenText: SyntaxText {
58+
SyntaxText(rebasing: wholeText[textRange])
59+
}
60+
var leadingTriviaText: SyntaxText {
61+
SyntaxText(rebasing: wholeText[..<textRange.lowerBound])
62+
}
63+
var trailingTriviaText: SyntaxText {
64+
SyntaxText(rebasing: wholeText[textRange.upperBound...])
65+
}
66+
}
67+
4568
extension RawSyntaxData.MaterializedToken {
4669
var leadingTrivia: RawTriviaPieceBuffer {
4770
RawTriviaPieceBuffer(rebasing: triviaPieces[..<Int(numLeadingTrivia)])
@@ -94,6 +117,8 @@ extension RawSyntax {
94117
switch rawData.payload {
95118
case .materializedToken(let dat):
96119
return dat.tokenKind
120+
case .parsedToken(let dat):
121+
return dat.tokenKind
97122
case .layout(_):
98123
preconditionFailure("'tokenKind' is not available for non-token node")
99124
}
@@ -102,6 +127,8 @@ extension RawSyntax {
102127
/// Token text of this node assuming this node is a token.
103128
var rawTokenText: SyntaxText {
104129
switch rawData.payload {
130+
case .parsedToken(let dat):
131+
return dat.tokenText
105132
case .materializedToken(let dat):
106133
return dat.tokenText
107134
case .layout(_):
@@ -112,6 +139,8 @@ extension RawSyntax {
112139
/// The UTF-8 byte length of the leading trivia, assuming this node is a token.
113140
var tokenLeadingTriviaByteLength: Int {
114141
switch rawData.payload {
142+
case .parsedToken(let dat):
143+
return dat.leadingTriviaText.count
115144
case .materializedToken(let dat):
116145
return dat.leadingTrivia.reduce(0) { $0 + $1.byteLength }
117146
case .layout(_):
@@ -122,26 +151,32 @@ extension RawSyntax {
122151
/// The UTF-8 byte length of the trailing trivia, assuming this node is a token.
123152
var tokenTrailingTriviaByteLength: Int {
124153
switch rawData.payload {
154+
case .parsedToken(let dat):
155+
return dat.trailingTriviaText.count
125156
case .materializedToken(let dat):
126157
return dat.trailingTrivia.reduce(0) { $0 + $1.byteLength }
127158
case .layout(_):
128159
preconditionFailure("'tokenTrailingTriviaByteLength' is not available for non-token node")
129160
}
130161
}
131162

132-
var tokenLeadingRawTriviaPieces: RawTriviaPieceBuffer {
163+
var tokenLeadingRawTriviaPieces: [RawTriviaPiece] {
133164
switch rawData.payload {
165+
case .parsedToken(let dat):
166+
return self.arena.parseTrivia(source: dat.leadingTriviaText, isTrailing: false)
134167
case .materializedToken(let dat):
135-
return dat.leadingTrivia
168+
return Array(dat.leadingTrivia)
136169
case .layout(_):
137170
preconditionFailure("'tokenLeadingRawTriviaPieces' is called on non-token raw syntax")
138171
}
139172
}
140173

141-
var tokenTrailingRawTriviaPieces: RawTriviaPieceBuffer {
174+
var tokenTrailingRawTriviaPieces: [RawTriviaPiece] {
142175
switch rawData.payload {
176+
case .parsedToken(let dat):
177+
return self.arena.parseTrivia(source: dat.trailingTriviaText, isTrailing: false)
143178
case .materializedToken(let dat):
144-
return dat.trailingTrivia
179+
return Array(dat.trailingTrivia)
145180
case .layout(_):
146181
preconditionFailure("'tokenTrailingRawTriviaPieces' is called on non-token raw syntax")
147182
}
@@ -152,6 +187,7 @@ extension RawSyntax {
152187
/// The syntax kind of this raw syntax.
153188
var kind: SyntaxKind {
154189
switch rawData.payload {
190+
case .parsedToken(_): return .token
155191
case .materializedToken(_): return .token
156192
case .layout(let dat): return dat.kind
157193
}
@@ -175,8 +211,11 @@ extension RawSyntax {
175211
/// Child nodes.
176212
var children: RawSyntaxBuffer {
177213
switch rawData.payload {
178-
case .materializedToken(_): return .init(start: nil, count: 0)
179-
case .layout(let dat): return dat.layout
214+
case .parsedToken(_),
215+
.materializedToken(_):
216+
return .init(start: nil, count: 0)
217+
case .layout(let dat):
218+
return dat.layout
180219
}
181220
}
182221

@@ -197,8 +236,11 @@ extension RawSyntax {
197236
/// Total number of nodes in this sub-tree, including `self` node.
198237
var totalNodes: Int {
199238
switch rawData.payload {
200-
case .materializedToken(_): return 1
201-
case .layout(let dat): return dat.descendantCount + 1
239+
case .parsedToken(_),
240+
.materializedToken(_):
241+
return 1
242+
case .layout(let dat):
243+
return dat.descendantCount + 1
202244
}
203245
}
204246

@@ -226,6 +268,7 @@ extension RawSyntax {
226268
/// Sum of text byte lengths of all descendant token nodes.
227269
var byteLength: Int {
228270
switch rawData.payload {
271+
case .parsedToken(let dat): return dat.wholeText.count
229272
case .materializedToken(let dat): return Int(dat.byteLength)
230273
case .layout(let dat): return dat.byteLength
231274
}
@@ -237,6 +280,8 @@ extension RawSyntax {
237280

238281
func formTokenKind() -> TokenKind? {
239282
switch rawData.payload {
283+
case .parsedToken(let dat):
284+
return TokenKind.fromRaw(kind: dat.tokenKind, text: dat.tokenText)
240285
case .materializedToken(let dat):
241286
return TokenKind.fromRaw(kind: dat.tokenKind, text: dat.tokenText)
242287
case .layout(_):
@@ -280,7 +325,13 @@ extension RawSyntax {
280325
/// Assuming this node is a token, returns a `RawSyntax` node with the same
281326
/// source text but with the token kind changed to `newValue`.
282327
func withTokenKind(_ newValue: TokenKind) -> RawSyntax {
283-
switch payload {
328+
switch rawData.payload {
329+
case .parsedToken(_):
330+
return .makeMaterializedToken(
331+
kind: newValue,
332+
leadingTrivia: formLeadingTrivia()!,
333+
trailingTrivia: formTokenTrailingTrivia()!,
334+
arena: arena)
284335
case .materializedToken(var payload):
285336
let decomposed = newValue.decomposeToRaw()
286337
let rawKind = decomposed.rawKind
@@ -517,6 +568,9 @@ extension RawSyntax: TextOutputStreamable, CustomStringConvertible {
517568
/// - Parameter stream: The stream on which to output this node.
518569
public func write<Target: TextOutputStream>(to target: inout Target) {
519570
switch rawData.payload {
571+
case .parsedToken(let dat):
572+
String(syntaxText: dat.wholeText).write(to: &target)
573+
break
520574
case .materializedToken(let dat):
521575
for p in dat.leadingTrivia { p.write(to: &target) }
522576
String(syntaxText: dat.tokenText).write(to: &target)
@@ -594,6 +648,8 @@ extension RawSyntax {
594648
/// is a token node.
595649
var tokenTextByteLength: Int {
596650
switch rawData.payload {
651+
case .parsedToken(let dat):
652+
return dat.tokenText.count
597653
case .materializedToken(let dat):
598654
return dat.tokenText.count
599655
case .layout(_):
@@ -640,6 +696,24 @@ private func makeRawTriviaPieces(leadingTrivia: Trivia, trailingTrivia: Trivia,
640696
}
641697

642698
extension RawSyntax {
699+
/// "Designated" factory method to create a parsed token node.
700+
///
701+
/// - Parameters:
702+
/// - kind: Token kind.
703+
/// - wholeText: Whole text of this token including trailing/leading trivia.
704+
/// - textRange: Range of the token text in `wholeText`.
705+
/// - arena: SyntaxArea to the result node data resides.
706+
internal static func parsedToken(
707+
kind: RawTokenKind,
708+
wholeText: SyntaxText,
709+
textRange: Range<SyntaxText.Index>,
710+
arena: SyntaxArena
711+
) -> RawSyntax {
712+
let payload = RawSyntaxData.ParsedToken(
713+
tokenKind: kind, wholeText: wholeText, textRange: textRange)
714+
return RawSyntax(arena: arena, payload: .parsedToken(payload))
715+
}
716+
643717
/// "Designated" factory method to create a materialized token node.
644718
///
645719
/// This should not be called directly.
@@ -864,6 +938,11 @@ extension RawSyntax: CustomDebugStringConvertible {
864938
private func debugWrite<Target: TextOutputStream>(to target: inout Target, indent: Int, withChildren: Bool = false) {
865939
let childIndent = indent + 2
866940
switch rawData.payload {
941+
case .parsedToken(let dat):
942+
target.write(".parsedToken(")
943+
target.write(String(describing: dat.tokenKind))
944+
target.write(" wholeText=\(dat.tokenText.debugDescription)")
945+
target.write(" textRange=\(dat.textRange.description)")
867946
case .materializedToken(let dat):
868947
target.write(".materializedToken(")
869948
target.write(String(describing: dat.tokenKind))

Sources/SwiftSyntax/RawSyntaxNodeProtocol.swift

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,10 @@ public struct RawTokenSyntax: RawSyntaxNodeProtocol {
8686
return raw.rawTokenText
8787
}
8888

89+
public var byteLength: Int {
90+
return raw.byteLength
91+
}
92+
8993
public var presence: SourcePresence {
9094
raw.presence
9195
}
@@ -94,7 +98,30 @@ public struct RawTokenSyntax: RawSyntaxNodeProtocol {
9498
presence == .missing
9599
}
96100

97-
/// Creates a `TokenSyntax`. `text` and trivia must be managed by the same
101+
public var leadingTriviaPieces: [RawTriviaPiece] {
102+
raw.tokenLeadingRawTriviaPieces
103+
}
104+
105+
public var trailingTriviaPieces: [RawTriviaPiece] {
106+
raw.tokenTrailingRawTriviaPieces
107+
}
108+
109+
/// Creates a `RawTokenSyntax`. `wholeText` must be managed by the same
110+
/// `arena`. `textRange` is a range of the token text in `wholeText`.
111+
public init(
112+
kind: RawTokenKind,
113+
wholeText: SyntaxText,
114+
textRange: Range<SyntaxText.Index>,
115+
arena: SyntaxArena
116+
) {
117+
assert(arena.contains(text: wholeText),
118+
"token text must be managed by the arena")
119+
let raw = RawSyntax.parsedToken(
120+
kind: kind, wholeText: wholeText, textRange: textRange, arena: arena)
121+
self = RawTokenSyntax(raw: raw)
122+
}
123+
124+
/// Creates a `RawTokenSyntax`. `text` and trivia must be managed by the same
98125
/// `arena`.
99126
public init(
100127
kind: RawTokenKind,

Sources/SwiftSyntax/SyntaxArena.swift

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,10 @@
1111
//===----------------------------------------------------------------------===//
1212

1313
public class SyntaxArena {
14+
15+
@_spi(RawSyntax)
16+
public typealias ParseTriviaFunction = (_ source: SyntaxText, _ isTrailing: Bool) -> [RawTriviaPiece]
17+
1418
/// Bump-pointer allocator for all "intern" methods.
1519
private var allocator: BumpPtrAllocator
1620
/// Source file buffer the Syntax tree represents.
@@ -22,12 +26,19 @@ public class SyntaxArena {
2226
/// Whether or not this arena has been added to other arenas as a child.
2327
/// Used to make sure we don’t introduce retain cycles between arenas.
2428
private var hasParent: Bool
29+
private var parseTriviaFunction: ParseTriviaFunction
2530

26-
public init() {
31+
@_spi(RawSyntax)
32+
public init(parseTriviaFunction: @escaping ParseTriviaFunction) {
2733
allocator = BumpPtrAllocator()
2834
children = []
2935
sourceBuffer = .init(start: nil, count: 0)
3036
hasParent = false
37+
self.parseTriviaFunction = parseTriviaFunction
38+
}
39+
40+
public convenience init() {
41+
self.init(parseTriviaFunction: _defaultParseTriviaFunction(_:_:))
3142
}
3243

3344
/// Copies a source buffer in to the memory this arena manages, and returns
@@ -145,6 +156,11 @@ public class SyntaxArena {
145156
sourceBufferContains(text.baseAddress!) ||
146157
allocator.contains(address: text.baseAddress!))
147158
}
159+
160+
@_spi(RawSyntax)
161+
public func parseTrivia(source: SyntaxText, isTrailing: Bool) -> [RawTriviaPiece] {
162+
return parseTriviaFunction(source, isTrailing)
163+
}
148164
}
149165

150166
extension SyntaxArena: Hashable {
@@ -184,3 +200,7 @@ extension SyntaxArena {
184200
//@available(*, deprecated, message: ".default SyntaxArena is subject to remove soon")
185201
public static let `default` = SyntaxArena()
186202
}
203+
204+
private func _defaultParseTriviaFunction(_ source: SyntaxText, _ isTrailnig: Bool) -> [RawTriviaPiece] {
205+
preconditionFailure("Trivia parsing not supported")
206+
}

Sources/SwiftSyntax/SyntaxClassifier.swift

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -276,9 +276,9 @@ fileprivate struct FastTokenSequence: Sequence {
276276
/// Provides a sequence of `SyntaxClassifiedRange`s for a token.
277277
fileprivate struct TokenClassificationIterator: IteratorProtocol {
278278
enum State {
279-
case atLeadingTrivia(RawTriviaPieceBuffer, Int)
279+
case atLeadingTrivia([RawTriviaPiece], Int)
280280
case atTokenText
281-
case atTrailingTrivia(RawTriviaPieceBuffer, Int)
281+
case atTrailingTrivia([RawTriviaPiece], Int)
282282
}
283283

284284
let token: AbsoluteNode

Sources/SwiftSyntax/Trivia.swift.gyb

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ extension TriviaPiece: CustomDebugStringConvertible {
7070
return "${trivia.lower_name}s(\(data))"
7171
% else:
7272
case .${trivia.lower_name}(let name):
73-
return "${trivia.lower_name}(\(name))"
73+
return "${trivia.lower_name}(\(name.debugDescription))"
7474
% end
7575
% end
7676
}
@@ -263,6 +263,11 @@ extension RawTriviaPiece: TextOutputStreamable {
263263
TriviaPiece(raw: self).write(to: &target)
264264
}
265265
}
266+
extension RawTriviaPiece: CustomDebugStringConvertible {
267+
public var debugDescription: String {
268+
TriviaPiece(raw: self).debugDescription
269+
}
270+
}
266271

267272
extension TriviaPiece {
268273
init(raw: RawTriviaPiece) {

Sources/SwiftSyntax/gyb_generated/Trivia.swift

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -108,17 +108,17 @@ extension TriviaPiece: CustomDebugStringConvertible {
108108
case .carriageReturnLineFeeds(let data):
109109
return "carriageReturnLineFeeds(\(data))"
110110
case .lineComment(let name):
111-
return "lineComment(\(name))"
111+
return "lineComment(\(name.debugDescription))"
112112
case .blockComment(let name):
113-
return "blockComment(\(name))"
113+
return "blockComment(\(name.debugDescription))"
114114
case .docLineComment(let name):
115-
return "docLineComment(\(name))"
115+
return "docLineComment(\(name.debugDescription))"
116116
case .docBlockComment(let name):
117-
return "docBlockComment(\(name))"
117+
return "docBlockComment(\(name.debugDescription))"
118118
case .unexpectedText(let name):
119-
return "unexpectedText(\(name))"
119+
return "unexpectedText(\(name.debugDescription))"
120120
case .shebang(let name):
121-
return "shebang(\(name))"
121+
return "shebang(\(name.debugDescription))"
122122
}
123123
}
124124
}
@@ -415,6 +415,11 @@ extension RawTriviaPiece: TextOutputStreamable {
415415
TriviaPiece(raw: self).write(to: &target)
416416
}
417417
}
418+
extension RawTriviaPiece: CustomDebugStringConvertible {
419+
public var debugDescription: String {
420+
TriviaPiece(raw: self).debugDescription
421+
}
422+
}
418423

419424
extension TriviaPiece {
420425
init(raw: RawTriviaPiece) {

0 commit comments

Comments
 (0)