Skip to content

Introduce RawSyntaxData.ParsedToken #597

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 17, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
113 changes: 100 additions & 13 deletions Sources/SwiftSyntax/RawSyntax.swift
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,28 @@ typealias RawTriviaPieceBuffer = UnsafeBufferPointer<RawTriviaPiece>
/// Node data for RawSyntax tree. Tagged union plus common data.
internal struct RawSyntaxData {
internal enum Payload {
case parsedToken(ParsedToken)
case materializedToken(MaterializedToken)
case layout(Layout)
}

/// Token with lazy trivia parsing.
///
/// The RawSyntax's `arena` must have a valid trivia parsing function to
/// lazily materialize the leading/trailing trivia pieces.
struct ParsedToken {
var tokenKind: RawTokenKind

/// Whole text of this token including leading/trailing trivia.
var wholeText: SyntaxText

/// Range of the actual token’s text.
///
/// Text in `wholeText` before `textRange.lowerBound` is leading trivia and
/// after `textRange.upperBound` is trailing trivia.
var textRange: Range<SyntaxText.Index>
}

/// Token typically created with `TokenSyntax.<someToken>`.
struct MaterializedToken {
var tokenKind: RawTokenKind
Expand All @@ -42,6 +60,18 @@ internal struct RawSyntaxData {
fileprivate var arenaReference: SyntaxArenaRef
}

extension RawSyntaxData.ParsedToken {
var tokenText: SyntaxText {
SyntaxText(rebasing: wholeText[textRange])
}
var leadingTriviaText: SyntaxText {
SyntaxText(rebasing: wholeText[..<textRange.lowerBound])
}
var trailingTriviaText: SyntaxText {
SyntaxText(rebasing: wholeText[textRange.upperBound...])
}
}

extension RawSyntaxData.MaterializedToken {
var leadingTrivia: RawTriviaPieceBuffer {
RawTriviaPieceBuffer(rebasing: triviaPieces[..<Int(numLeadingTrivia)])
Expand Down Expand Up @@ -94,6 +124,8 @@ extension RawSyntax {
switch rawData.payload {
case .materializedToken(let dat):
return dat.tokenKind
case .parsedToken(let dat):
return dat.tokenKind
case .layout(_):
preconditionFailure("'tokenKind' is not available for non-token node")
}
Expand All @@ -102,6 +134,8 @@ extension RawSyntax {
/// Token text of this node assuming this node is a token.
var rawTokenText: SyntaxText {
switch rawData.payload {
case .parsedToken(let dat):
return dat.tokenText
case .materializedToken(let dat):
return dat.tokenText
case .layout(_):
Expand All @@ -112,6 +146,8 @@ extension RawSyntax {
/// The UTF-8 byte length of the leading trivia, assuming this node is a token.
var tokenLeadingTriviaByteLength: Int {
switch rawData.payload {
case .parsedToken(let dat):
return dat.leadingTriviaText.count
case .materializedToken(let dat):
return dat.leadingTrivia.reduce(0) { $0 + $1.byteLength }
case .layout(_):
Expand All @@ -122,26 +158,32 @@ extension RawSyntax {
/// The UTF-8 byte length of the trailing trivia, assuming this node is a token.
var tokenTrailingTriviaByteLength: Int {
switch rawData.payload {
case .parsedToken(let dat):
return dat.trailingTriviaText.count
case .materializedToken(let dat):
return dat.trailingTrivia.reduce(0) { $0 + $1.byteLength }
case .layout(_):
preconditionFailure("'tokenTrailingTriviaByteLength' is not available for non-token node")
}
}

var tokenLeadingRawTriviaPieces: RawTriviaPieceBuffer {
var tokenLeadingRawTriviaPieces: [RawTriviaPiece] {
switch rawData.payload {
case .parsedToken(let dat):
return self.arena.parseTrivia(source: dat.leadingTriviaText, position: .leading)
case .materializedToken(let dat):
return dat.leadingTrivia
return Array(dat.leadingTrivia)
case .layout(_):
preconditionFailure("'tokenLeadingRawTriviaPieces' is called on non-token raw syntax")
}
}

var tokenTrailingRawTriviaPieces: RawTriviaPieceBuffer {
var tokenTrailingRawTriviaPieces: [RawTriviaPiece] {
switch rawData.payload {
case .parsedToken(let dat):
return self.arena.parseTrivia(source: dat.trailingTriviaText, position: .trailing)
case .materializedToken(let dat):
return dat.trailingTrivia
return Array(dat.trailingTrivia)
case .layout(_):
preconditionFailure("'tokenTrailingRawTriviaPieces' is called on non-token raw syntax")
}
Expand All @@ -152,6 +194,7 @@ extension RawSyntax {
/// The syntax kind of this raw syntax.
var kind: SyntaxKind {
switch rawData.payload {
case .parsedToken(_): return .token
case .materializedToken(_): return .token
case .layout(let dat): return dat.kind
}
Expand All @@ -175,8 +218,11 @@ extension RawSyntax {
/// Child nodes.
var children: RawSyntaxBuffer {
switch rawData.payload {
case .materializedToken(_): return .init(start: nil, count: 0)
case .layout(let dat): return dat.layout
case .parsedToken(_),
.materializedToken(_):
return .init(start: nil, count: 0)
case .layout(let dat):
return dat.layout
}
}

Expand All @@ -197,8 +243,11 @@ extension RawSyntax {
/// Total number of nodes in this sub-tree, including `self` node.
var totalNodes: Int {
switch rawData.payload {
case .materializedToken(_): return 1
case .layout(let dat): return dat.descendantCount + 1
case .parsedToken(_),
.materializedToken(_):
return 1
case .layout(let dat):
return dat.descendantCount + 1
}
}

Expand Down Expand Up @@ -226,6 +275,7 @@ extension RawSyntax {
/// Sum of text byte lengths of all descendant token nodes.
var byteLength: Int {
switch rawData.payload {
case .parsedToken(let dat): return dat.wholeText.count
case .materializedToken(let dat): return Int(dat.byteLength)
case .layout(let dat): return dat.byteLength
}
Expand All @@ -237,6 +287,8 @@ extension RawSyntax {

func formTokenKind() -> TokenKind? {
switch rawData.payload {
case .parsedToken(let dat):
return TokenKind.fromRaw(kind: dat.tokenKind, text: dat.tokenText)
case .materializedToken(let dat):
return TokenKind.fromRaw(kind: dat.tokenKind, text: dat.tokenText)
case .layout(_):
Expand All @@ -255,13 +307,13 @@ extension RawSyntax {
}

/// Returns the leading `Trivia`, assuming this node is a token.
func formTokenLeadingTrivia() -> Trivia? {
func formTokenLeadingTrivia() -> Trivia {
return Trivia(pieces: tokenLeadingRawTriviaPieces.map({ TriviaPiece(raw: $0) }))
}

/// Returns the trailing `Trivia`, assuming this node is a token.
/// - Returns: nil if called on a layout node.
func formTokenTrailingTrivia() -> Trivia? {
func formTokenTrailingTrivia() -> Trivia {
return Trivia(pieces: tokenTrailingRawTriviaPieces.map({ TriviaPiece(raw: $0) }))
}

Expand All @@ -280,7 +332,14 @@ extension RawSyntax {
/// Assuming this node is a token, returns a `RawSyntax` node with the same
/// source text but with the token kind changed to `newValue`.
func withTokenKind(_ newValue: TokenKind) -> RawSyntax {
switch payload {
switch rawData.payload {
case .parsedToken(_):
// The wholeText can't be continuous anymore. Make a materialized token.
return .makeMaterializedToken(
kind: newValue,
leadingTrivia: formTokenLeadingTrivia(),
trailingTrivia: formTokenTrailingTrivia(),
arena: arena)
case .materializedToken(var payload):
let decomposed = newValue.decomposeToRaw()
let rawKind = decomposed.rawKind
Expand All @@ -304,7 +363,7 @@ extension RawSyntax {
return .makeMaterializedToken(
kind: formTokenKind()!,
leadingTrivia: leadingTrivia,
trailingTrivia: formTokenTrailingTrivia()!,
trailingTrivia: formTokenTrailingTrivia(),
arena: arena)
}

Expand All @@ -324,7 +383,7 @@ extension RawSyntax {
if isToken {
return .makeMaterializedToken(
kind: formTokenKind()!,
leadingTrivia: formTokenLeadingTrivia()!,
leadingTrivia: formTokenLeadingTrivia(),
trailingTrivia: trailingTrivia,
arena: arena)
}
Expand Down Expand Up @@ -517,6 +576,9 @@ extension RawSyntax: TextOutputStreamable, CustomStringConvertible {
/// - Parameter stream: The stream on which to output this node.
public func write<Target: TextOutputStream>(to target: inout Target) {
switch rawData.payload {
case .parsedToken(let dat):
String(syntaxText: dat.wholeText).write(to: &target)
break
case .materializedToken(let dat):
for p in dat.leadingTrivia { p.write(to: &target) }
String(syntaxText: dat.tokenText).write(to: &target)
Expand Down Expand Up @@ -594,6 +656,8 @@ extension RawSyntax {
/// is a token node.
var tokenTextByteLength: Int {
switch rawData.payload {
case .parsedToken(let dat):
return dat.tokenText.count
case .materializedToken(let dat):
return dat.tokenText.count
case .layout(_):
Expand Down Expand Up @@ -640,6 +704,24 @@ private func makeRawTriviaPieces(leadingTrivia: Trivia, trailingTrivia: Trivia,
}

extension RawSyntax {
/// "Designated" factory method to create a parsed token node.
///
/// - Parameters:
/// - kind: Token kind.
/// - wholeText: Whole text of this token including trailing/leading trivia.
/// - textRange: Range of the token text in `wholeText`.
/// - arena: SyntaxArea to the result node data resides.
internal static func parsedToken(
kind: RawTokenKind,
wholeText: SyntaxText,
textRange: Range<SyntaxText.Index>,
arena: SyntaxArena
) -> RawSyntax {
let payload = RawSyntaxData.ParsedToken(
tokenKind: kind, wholeText: wholeText, textRange: textRange)
return RawSyntax(arena: arena, payload: .parsedToken(payload))
}

/// "Designated" factory method to create a materialized token node.
///
/// This should not be called directly.
Expand Down Expand Up @@ -864,6 +946,11 @@ extension RawSyntax: CustomDebugStringConvertible {
private func debugWrite<Target: TextOutputStream>(to target: inout Target, indent: Int, withChildren: Bool = false) {
let childIndent = indent + 2
switch rawData.payload {
case .parsedToken(let dat):
target.write(".parsedToken(")
target.write(String(describing: dat.tokenKind))
target.write(" wholeText=\(dat.tokenText.debugDescription)")
target.write(" textRange=\(dat.textRange.description)")
case .materializedToken(let dat):
target.write(".materializedToken(")
target.write(String(describing: dat.tokenKind))
Expand Down
29 changes: 28 additions & 1 deletion Sources/SwiftSyntax/RawSyntaxNodeProtocol.swift
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,10 @@ public struct RawTokenSyntax: RawSyntaxNodeProtocol {
return raw.rawTokenText
}

public var byteLength: Int {
return raw.byteLength
}

public var presence: SourcePresence {
raw.presence
}
Expand All @@ -94,7 +98,30 @@ public struct RawTokenSyntax: RawSyntaxNodeProtocol {
presence == .missing
}

/// Creates a `TokenSyntax`. `text` and trivia must be managed by the same
public var leadingTriviaPieces: [RawTriviaPiece] {
raw.tokenLeadingRawTriviaPieces
}

public var trailingTriviaPieces: [RawTriviaPiece] {
raw.tokenTrailingRawTriviaPieces
}

/// Creates a `RawTokenSyntax`. `wholeText` must be managed by the same
/// `arena`. `textRange` is a range of the token text in `wholeText`.
public init(
kind: RawTokenKind,
wholeText: SyntaxText,
textRange: Range<SyntaxText.Index>,
arena: SyntaxArena
) {
assert(arena.contains(text: wholeText),
"token text must be managed by the arena")
let raw = RawSyntax.parsedToken(
kind: kind, wholeText: wholeText, textRange: textRange, arena: arena)
self = RawTokenSyntax(raw: raw)
}

/// Creates a `RawTokenSyntax`. `text` and trivia must be managed by the same
/// `arena`.
public init(
kind: RawTokenKind,
Expand Down
5 changes: 3 additions & 2 deletions Sources/SwiftSyntax/Syntax.swift
Original file line number Diff line number Diff line change
Expand Up @@ -118,13 +118,14 @@ public protocol SyntaxProtocol: CustomStringConvertible,
var syntaxNodeType: SyntaxProtocol.Type { get }
}

internal extension SyntaxProtocol {
extension SyntaxProtocol {
var data: SyntaxData {
return _syntaxNode.data
}

/// Access the raw syntax assuming the node is a Syntax.
var raw: RawSyntax {
@_spi(RawSyntax)
public var raw: RawSyntax {
return _syntaxNode.data.raw
}
}
Expand Down
22 changes: 21 additions & 1 deletion Sources/SwiftSyntax/SyntaxArena.swift
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@
//===----------------------------------------------------------------------===//

public class SyntaxArena {

@_spi(RawSyntax)
public typealias ParseTriviaFunction = (_ source: SyntaxText, _ position: TriviaPosition) -> [RawTriviaPiece]

/// Bump-pointer allocator for all "intern" methods.
private var allocator: BumpPtrAllocator
/// Source file buffer the Syntax tree represents.
Expand All @@ -22,12 +26,19 @@ public class SyntaxArena {
/// Whether or not this arena has been added to other arenas as a child.
/// Used to make sure we don’t introduce retain cycles between arenas.
private var hasParent: Bool
private var parseTriviaFunction: ParseTriviaFunction

public init() {
@_spi(RawSyntax)
public init(parseTriviaFunction: @escaping ParseTriviaFunction) {
allocator = BumpPtrAllocator()
children = []
sourceBuffer = .init(start: nil, count: 0)
hasParent = false
self.parseTriviaFunction = parseTriviaFunction
}

public convenience init() {
self.init(parseTriviaFunction: _defaultParseTriviaFunction(_:_:))
}

/// Copies a source buffer in to the memory this arena manages, and returns
Expand Down Expand Up @@ -145,6 +156,11 @@ public class SyntaxArena {
sourceBufferContains(text.baseAddress!) ||
allocator.contains(address: text.baseAddress!))
}

@_spi(RawSyntax)
public func parseTrivia(source: SyntaxText, position: TriviaPosition) -> [RawTriviaPiece] {
return self.parseTriviaFunction(source, position)
}
}

extension SyntaxArena: Hashable {
Expand Down Expand Up @@ -184,3 +200,7 @@ extension SyntaxArena {
//@available(*, deprecated, message: ".default SyntaxArena is subject to remove soon")
public static let `default` = SyntaxArena()
}

private func _defaultParseTriviaFunction(_ source: SyntaxText, _ position: TriviaPosition) -> [RawTriviaPiece] {
preconditionFailure("Trivia parsing not supported")
}
4 changes: 2 additions & 2 deletions Sources/SwiftSyntax/SyntaxClassifier.swift
Original file line number Diff line number Diff line change
Expand Up @@ -276,9 +276,9 @@ fileprivate struct FastTokenSequence: Sequence {
/// Provides a sequence of `SyntaxClassifiedRange`s for a token.
fileprivate struct TokenClassificationIterator: IteratorProtocol {
enum State {
case atLeadingTrivia(RawTriviaPieceBuffer, Int)
case atLeadingTrivia([RawTriviaPiece], Int)
case atTokenText
case atTrailingTrivia(RawTriviaPieceBuffer, Int)
case atTrailingTrivia([RawTriviaPiece], Int)
}

let token: AbsoluteNode
Expand Down
Loading