Skip to content

Commit 0e2b31e

Browse files
authored
Merge pull request #597 from rintaro/rawsyntax-parsedtoken
Introduce `RawSyntaxData.ParsedToken`
2 parents 6144666 + b68716f commit 0e2b31e

File tree

9 files changed

+212
-28
lines changed

9 files changed

+212
-28
lines changed

Sources/SwiftSyntax/RawSyntax.swift

Lines changed: 100 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,28 @@ typealias RawTriviaPieceBuffer = UnsafeBufferPointer<RawTriviaPiece>
1616
/// Node data for RawSyntax tree. Tagged union plus common data.
1717
internal struct RawSyntaxData {
1818
internal enum Payload {
19+
case parsedToken(ParsedToken)
1920
case materializedToken(MaterializedToken)
2021
case layout(Layout)
2122
}
2223

24+
/// Token with lazy trivia parsing.
25+
///
26+
/// The RawSyntax's `arena` must have a valid trivia parsing function to
27+
/// lazily materialize the leading/trailing trivia pieces.
28+
struct ParsedToken {
29+
var tokenKind: RawTokenKind
30+
31+
/// Whole text of this token including leading/trailing trivia.
32+
var wholeText: SyntaxText
33+
34+
/// Range of the actual token’s text.
35+
///
36+
/// Text in `wholeText` before `textRange.lowerBound` is leading trivia and
37+
/// after `textRange.upperBound` is trailing trivia.
38+
var textRange: Range<SyntaxText.Index>
39+
}
40+
2341
/// Token typically created with `TokenSyntax.<someToken>`.
2442
struct MaterializedToken {
2543
var tokenKind: RawTokenKind
@@ -42,6 +60,18 @@ internal struct RawSyntaxData {
4260
fileprivate var arenaReference: SyntaxArenaRef
4361
}
4462

63+
extension RawSyntaxData.ParsedToken {
64+
var tokenText: SyntaxText {
65+
SyntaxText(rebasing: wholeText[textRange])
66+
}
67+
var leadingTriviaText: SyntaxText {
68+
SyntaxText(rebasing: wholeText[..<textRange.lowerBound])
69+
}
70+
var trailingTriviaText: SyntaxText {
71+
SyntaxText(rebasing: wholeText[textRange.upperBound...])
72+
}
73+
}
74+
4575
extension RawSyntaxData.MaterializedToken {
4676
var leadingTrivia: RawTriviaPieceBuffer {
4777
RawTriviaPieceBuffer(rebasing: triviaPieces[..<Int(numLeadingTrivia)])
@@ -94,6 +124,8 @@ extension RawSyntax {
94124
switch rawData.payload {
95125
case .materializedToken(let dat):
96126
return dat.tokenKind
127+
case .parsedToken(let dat):
128+
return dat.tokenKind
97129
case .layout(_):
98130
preconditionFailure("'tokenKind' is not available for non-token node")
99131
}
@@ -102,6 +134,8 @@ extension RawSyntax {
102134
/// Token text of this node assuming this node is a token.
103135
var rawTokenText: SyntaxText {
104136
switch rawData.payload {
137+
case .parsedToken(let dat):
138+
return dat.tokenText
105139
case .materializedToken(let dat):
106140
return dat.tokenText
107141
case .layout(_):
@@ -112,6 +146,8 @@ extension RawSyntax {
112146
/// The UTF-8 byte length of the leading trivia, assuming this node is a token.
113147
var tokenLeadingTriviaByteLength: Int {
114148
switch rawData.payload {
149+
case .parsedToken(let dat):
150+
return dat.leadingTriviaText.count
115151
case .materializedToken(let dat):
116152
return dat.leadingTrivia.reduce(0) { $0 + $1.byteLength }
117153
case .layout(_):
@@ -122,26 +158,32 @@ extension RawSyntax {
122158
/// The UTF-8 byte length of the trailing trivia, assuming this node is a token.
123159
var tokenTrailingTriviaByteLength: Int {
124160
switch rawData.payload {
161+
case .parsedToken(let dat):
162+
return dat.trailingTriviaText.count
125163
case .materializedToken(let dat):
126164
return dat.trailingTrivia.reduce(0) { $0 + $1.byteLength }
127165
case .layout(_):
128166
preconditionFailure("'tokenTrailingTriviaByteLength' is not available for non-token node")
129167
}
130168
}
131169

132-
var tokenLeadingRawTriviaPieces: RawTriviaPieceBuffer {
170+
var tokenLeadingRawTriviaPieces: [RawTriviaPiece] {
133171
switch rawData.payload {
172+
case .parsedToken(let dat):
173+
return self.arena.parseTrivia(source: dat.leadingTriviaText, position: .leading)
134174
case .materializedToken(let dat):
135-
return dat.leadingTrivia
175+
return Array(dat.leadingTrivia)
136176
case .layout(_):
137177
preconditionFailure("'tokenLeadingRawTriviaPieces' is called on non-token raw syntax")
138178
}
139179
}
140180

141-
var tokenTrailingRawTriviaPieces: RawTriviaPieceBuffer {
181+
var tokenTrailingRawTriviaPieces: [RawTriviaPiece] {
142182
switch rawData.payload {
183+
case .parsedToken(let dat):
184+
return self.arena.parseTrivia(source: dat.trailingTriviaText, position: .trailing)
143185
case .materializedToken(let dat):
144-
return dat.trailingTrivia
186+
return Array(dat.trailingTrivia)
145187
case .layout(_):
146188
preconditionFailure("'tokenTrailingRawTriviaPieces' is called on non-token raw syntax")
147189
}
@@ -152,6 +194,7 @@ extension RawSyntax {
152194
/// The syntax kind of this raw syntax.
153195
var kind: SyntaxKind {
154196
switch rawData.payload {
197+
case .parsedToken(_): return .token
155198
case .materializedToken(_): return .token
156199
case .layout(let dat): return dat.kind
157200
}
@@ -175,8 +218,11 @@ extension RawSyntax {
175218
/// Child nodes.
176219
var children: RawSyntaxBuffer {
177220
switch rawData.payload {
178-
case .materializedToken(_): return .init(start: nil, count: 0)
179-
case .layout(let dat): return dat.layout
221+
case .parsedToken(_),
222+
.materializedToken(_):
223+
return .init(start: nil, count: 0)
224+
case .layout(let dat):
225+
return dat.layout
180226
}
181227
}
182228

@@ -197,8 +243,11 @@ extension RawSyntax {
197243
/// Total number of nodes in this sub-tree, including `self` node.
198244
var totalNodes: Int {
199245
switch rawData.payload {
200-
case .materializedToken(_): return 1
201-
case .layout(let dat): return dat.descendantCount + 1
246+
case .parsedToken(_),
247+
.materializedToken(_):
248+
return 1
249+
case .layout(let dat):
250+
return dat.descendantCount + 1
202251
}
203252
}
204253

@@ -226,6 +275,7 @@ extension RawSyntax {
226275
/// Sum of text byte lengths of all descendant token nodes.
227276
var byteLength: Int {
228277
switch rawData.payload {
278+
case .parsedToken(let dat): return dat.wholeText.count
229279
case .materializedToken(let dat): return Int(dat.byteLength)
230280
case .layout(let dat): return dat.byteLength
231281
}
@@ -237,6 +287,8 @@ extension RawSyntax {
237287

238288
func formTokenKind() -> TokenKind? {
239289
switch rawData.payload {
290+
case .parsedToken(let dat):
291+
return TokenKind.fromRaw(kind: dat.tokenKind, text: dat.tokenText)
240292
case .materializedToken(let dat):
241293
return TokenKind.fromRaw(kind: dat.tokenKind, text: dat.tokenText)
242294
case .layout(_):
@@ -255,13 +307,13 @@ extension RawSyntax {
255307
}
256308

257309
/// Returns the leading `Trivia`, assuming this node is a token.
258-
func formTokenLeadingTrivia() -> Trivia? {
310+
func formTokenLeadingTrivia() -> Trivia {
259311
return Trivia(pieces: tokenLeadingRawTriviaPieces.map({ TriviaPiece(raw: $0) }))
260312
}
261313

262314
/// Returns the trailing `Trivia`, assuming this node is a token.
263315
/// - Returns: nil if called on a layout node.
264-
func formTokenTrailingTrivia() -> Trivia? {
316+
func formTokenTrailingTrivia() -> Trivia {
265317
return Trivia(pieces: tokenTrailingRawTriviaPieces.map({ TriviaPiece(raw: $0) }))
266318
}
267319

@@ -280,7 +332,14 @@ extension RawSyntax {
280332
/// Assuming this node is a token, returns a `RawSyntax` node with the same
281333
/// source text but with the token kind changed to `newValue`.
282334
func withTokenKind(_ newValue: TokenKind) -> RawSyntax {
283-
switch payload {
335+
switch rawData.payload {
336+
case .parsedToken(_):
337+
// The wholeText can't be continuous anymore. Make a materialized token.
338+
return .makeMaterializedToken(
339+
kind: newValue,
340+
leadingTrivia: formTokenLeadingTrivia(),
341+
trailingTrivia: formTokenTrailingTrivia(),
342+
arena: arena)
284343
case .materializedToken(var payload):
285344
let decomposed = newValue.decomposeToRaw()
286345
let rawKind = decomposed.rawKind
@@ -304,7 +363,7 @@ extension RawSyntax {
304363
return .makeMaterializedToken(
305364
kind: formTokenKind()!,
306365
leadingTrivia: leadingTrivia,
307-
trailingTrivia: formTokenTrailingTrivia()!,
366+
trailingTrivia: formTokenTrailingTrivia(),
308367
arena: arena)
309368
}
310369

@@ -324,7 +383,7 @@ extension RawSyntax {
324383
if isToken {
325384
return .makeMaterializedToken(
326385
kind: formTokenKind()!,
327-
leadingTrivia: formTokenLeadingTrivia()!,
386+
leadingTrivia: formTokenLeadingTrivia(),
328387
trailingTrivia: trailingTrivia,
329388
arena: arena)
330389
}
@@ -517,6 +576,9 @@ extension RawSyntax: TextOutputStreamable, CustomStringConvertible {
517576
/// - Parameter stream: The stream on which to output this node.
518577
public func write<Target: TextOutputStream>(to target: inout Target) {
519578
switch rawData.payload {
579+
case .parsedToken(let dat):
580+
String(syntaxText: dat.wholeText).write(to: &target)
581+
break
520582
case .materializedToken(let dat):
521583
for p in dat.leadingTrivia { p.write(to: &target) }
522584
String(syntaxText: dat.tokenText).write(to: &target)
@@ -594,6 +656,8 @@ extension RawSyntax {
594656
/// is a token node.
595657
var tokenTextByteLength: Int {
596658
switch rawData.payload {
659+
case .parsedToken(let dat):
660+
return dat.tokenText.count
597661
case .materializedToken(let dat):
598662
return dat.tokenText.count
599663
case .layout(_):
@@ -640,6 +704,24 @@ private func makeRawTriviaPieces(leadingTrivia: Trivia, trailingTrivia: Trivia,
640704
}
641705

642706
extension RawSyntax {
707+
/// "Designated" factory method to create a parsed token node.
708+
///
709+
/// - Parameters:
710+
/// - kind: Token kind.
711+
/// - wholeText: Whole text of this token including trailing/leading trivia.
712+
/// - textRange: Range of the token text in `wholeText`.
713+
/// - arena: SyntaxArea to the result node data resides.
714+
internal static func parsedToken(
715+
kind: RawTokenKind,
716+
wholeText: SyntaxText,
717+
textRange: Range<SyntaxText.Index>,
718+
arena: SyntaxArena
719+
) -> RawSyntax {
720+
let payload = RawSyntaxData.ParsedToken(
721+
tokenKind: kind, wholeText: wholeText, textRange: textRange)
722+
return RawSyntax(arena: arena, payload: .parsedToken(payload))
723+
}
724+
643725
/// "Designated" factory method to create a materialized token node.
644726
///
645727
/// This should not be called directly.
@@ -864,6 +946,11 @@ extension RawSyntax: CustomDebugStringConvertible {
864946
private func debugWrite<Target: TextOutputStream>(to target: inout Target, indent: Int, withChildren: Bool = false) {
865947
let childIndent = indent + 2
866948
switch rawData.payload {
949+
case .parsedToken(let dat):
950+
target.write(".parsedToken(")
951+
target.write(String(describing: dat.tokenKind))
952+
target.write(" wholeText=\(dat.tokenText.debugDescription)")
953+
target.write(" textRange=\(dat.textRange.description)")
867954
case .materializedToken(let dat):
868955
target.write(".materializedToken(")
869956
target.write(String(describing: dat.tokenKind))

Sources/SwiftSyntax/RawSyntaxNodeProtocol.swift

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,10 @@ public struct RawTokenSyntax: RawSyntaxNodeProtocol {
8686
return raw.rawTokenText
8787
}
8888

89+
public var byteLength: Int {
90+
return raw.byteLength
91+
}
92+
8993
public var presence: SourcePresence {
9094
raw.presence
9195
}
@@ -94,7 +98,30 @@ public struct RawTokenSyntax: RawSyntaxNodeProtocol {
9498
presence == .missing
9599
}
96100

97-
/// Creates a `TokenSyntax`. `text` and trivia must be managed by the same
101+
public var leadingTriviaPieces: [RawTriviaPiece] {
102+
raw.tokenLeadingRawTriviaPieces
103+
}
104+
105+
public var trailingTriviaPieces: [RawTriviaPiece] {
106+
raw.tokenTrailingRawTriviaPieces
107+
}
108+
109+
/// Creates a `RawTokenSyntax`. `wholeText` must be managed by the same
110+
/// `arena`. `textRange` is a range of the token text in `wholeText`.
111+
public init(
112+
kind: RawTokenKind,
113+
wholeText: SyntaxText,
114+
textRange: Range<SyntaxText.Index>,
115+
arena: SyntaxArena
116+
) {
117+
assert(arena.contains(text: wholeText),
118+
"token text must be managed by the arena")
119+
let raw = RawSyntax.parsedToken(
120+
kind: kind, wholeText: wholeText, textRange: textRange, arena: arena)
121+
self = RawTokenSyntax(raw: raw)
122+
}
123+
124+
/// Creates a `RawTokenSyntax`. `text` and trivia must be managed by the same
98125
/// `arena`.
99126
public init(
100127
kind: RawTokenKind,

Sources/SwiftSyntax/Syntax.swift

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -118,13 +118,14 @@ public protocol SyntaxProtocol: CustomStringConvertible,
118118
var syntaxNodeType: SyntaxProtocol.Type { get }
119119
}
120120

121-
internal extension SyntaxProtocol {
121+
extension SyntaxProtocol {
122122
var data: SyntaxData {
123123
return _syntaxNode.data
124124
}
125125

126126
/// Access the raw syntax assuming the node is a Syntax.
127-
var raw: RawSyntax {
127+
@_spi(RawSyntax)
128+
public var raw: RawSyntax {
128129
return _syntaxNode.data.raw
129130
}
130131
}

Sources/SwiftSyntax/SyntaxArena.swift

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,10 @@
1111
//===----------------------------------------------------------------------===//
1212

1313
public class SyntaxArena {
14+
15+
@_spi(RawSyntax)
16+
public typealias ParseTriviaFunction = (_ source: SyntaxText, _ position: TriviaPosition) -> [RawTriviaPiece]
17+
1418
/// Bump-pointer allocator for all "intern" methods.
1519
private var allocator: BumpPtrAllocator
1620
/// Source file buffer the Syntax tree represents.
@@ -22,12 +26,19 @@ public class SyntaxArena {
2226
/// Whether or not this arena has been added to other arenas as a child.
2327
/// Used to make sure we don’t introduce retain cycles between arenas.
2428
private var hasParent: Bool
29+
private var parseTriviaFunction: ParseTriviaFunction
2530

26-
public init() {
31+
@_spi(RawSyntax)
32+
public init(parseTriviaFunction: @escaping ParseTriviaFunction) {
2733
allocator = BumpPtrAllocator()
2834
children = []
2935
sourceBuffer = .init(start: nil, count: 0)
3036
hasParent = false
37+
self.parseTriviaFunction = parseTriviaFunction
38+
}
39+
40+
public convenience init() {
41+
self.init(parseTriviaFunction: _defaultParseTriviaFunction(_:_:))
3142
}
3243

3344
/// Copies a source buffer in to the memory this arena manages, and returns
@@ -145,6 +156,11 @@ public class SyntaxArena {
145156
sourceBufferContains(text.baseAddress!) ||
146157
allocator.contains(address: text.baseAddress!))
147158
}
159+
160+
@_spi(RawSyntax)
161+
public func parseTrivia(source: SyntaxText, position: TriviaPosition) -> [RawTriviaPiece] {
162+
return self.parseTriviaFunction(source, position)
163+
}
148164
}
149165

150166
extension SyntaxArena: Hashable {
@@ -184,3 +200,7 @@ extension SyntaxArena {
184200
//@available(*, deprecated, message: ".default SyntaxArena is subject to remove soon")
185201
public static let `default` = SyntaxArena()
186202
}
203+
204+
private func _defaultParseTriviaFunction(_ source: SyntaxText, _ position: TriviaPosition) -> [RawTriviaPiece] {
205+
preconditionFailure("Trivia parsing not supported")
206+
}

Sources/SwiftSyntax/SyntaxClassifier.swift

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -276,9 +276,9 @@ fileprivate struct FastTokenSequence: Sequence {
276276
/// Provides a sequence of `SyntaxClassifiedRange`s for a token.
277277
fileprivate struct TokenClassificationIterator: IteratorProtocol {
278278
enum State {
279-
case atLeadingTrivia(RawTriviaPieceBuffer, Int)
279+
case atLeadingTrivia([RawTriviaPiece], Int)
280280
case atTokenText
281-
case atTrailingTrivia(RawTriviaPieceBuffer, Int)
281+
case atTrailingTrivia([RawTriviaPiece], Int)
282282
}
283283

284284
let token: AbsoluteNode

0 commit comments

Comments
 (0)