Skip to content

[classifier] Provide a more efficient classification mechanism #98

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 5, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
232 changes: 232 additions & 0 deletions Sources/SwiftSyntax/RawSyntax.swift
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,55 @@ fileprivate struct TokenData {
}
}

fileprivate func withUnsafeTokenText<Result>(
relativeOffset: Int,
length: UInt32,
extraPtr: DataElementPtr,
_ body: (UnsafeTokenText) -> Result
) -> Result {
if isParsed {
let data = parsedData(length: length, extraPtr: extraPtr)
return body(data.getTokenText(relativeOffset: relativeOffset))
} else {
let tok: ConstructedTokenData = castElementAs(extraPtr).pointee
return tok.kind.withUnsafeTokenText(body)
}
}

fileprivate func withUnsafeLeadingTriviaPiece<Result>(
at index: Int,
relativeOffset: Int,
length: UInt32,
extraPtr: DataElementPtr,
_ body: (UnsafeTriviaPiece?) -> Result
) -> Result {
if isParsed {
let data = parsedData(length: length, extraPtr: extraPtr)
return body(data.getLeadingTriviaPiece(at: index, relativeOffset: relativeOffset))
} else {
let tok: ConstructedTokenData = castElementAs(extraPtr).pointee
guard index < tok.leadingTrivia.count else { return body(nil) }
return tok.leadingTrivia[index].withUnsafeTriviaPiece(body)
}
}

fileprivate func withUnsafeTrailingTriviaPiece<Result>(
at index: Int,
relativeOffset: Int,
length: UInt32,
extraPtr: DataElementPtr,
_ body: (UnsafeTriviaPiece?) -> Result
) -> Result {
if isParsed {
let data = parsedData(length: length, extraPtr: extraPtr)
return body(data.getTrailingTriviaPiece(at: index, relativeOffset: relativeOffset))
} else {
let tok: ConstructedTokenData = castElementAs(extraPtr).pointee
guard index < tok.trailingTrivia.count else { return body(nil) }
return tok.trailingTrivia[index].withUnsafeTriviaPiece(body)
}
}

/// Prints the RawSyntax token.
fileprivate func write<Target>(
to target: inout Target, length: UInt32, extraPtr: DataElementPtr
Expand Down Expand Up @@ -368,6 +417,42 @@ fileprivate struct UnsafeParsedTokenData {
return .init(pieces: newPieces)
}

func getTokenText(relativeOffset: Int) -> UnsafeTokenText {
let leadingTriviaLength = relativeOffset
let trailingTriviaLength = self.getTrailingTriviaLength()
let tokenLength = Int(length) - (leadingTriviaLength + trailingTriviaLength)
let customText = fullTextBuffer.isEmpty ? emptyStringBuffer :
getTextSlice(start: relativeOffset, length: tokenLength)
return .init(kind: .fromRawValue(tokenKind), length: tokenLength, customText: customText)
}

func getLeadingTriviaPiece(
at index: Int, relativeOffset: Int
) -> UnsafeTriviaPiece? {
return getTriviaPiece(at: index, relativeOffset: relativeOffset,
trivia: leadingTriviaBuffer)
}

func getTrailingTriviaPiece(
at index: Int, relativeOffset: Int
) -> UnsafeTriviaPiece? {
return getTriviaPiece(at: index, relativeOffset: relativeOffset,
trivia: trailingTriviaBuffer)
}

private func getTriviaPiece(
at index: Int,
relativeOffset: Int,
trivia: UnsafeBufferPointer<CTriviaPiece>
) -> UnsafeTriviaPiece? {
guard index < trivia.count else { return nil }
let cpiece = trivia[index]
let length = Int(cpiece.length)
let customText = fullTextBuffer.isEmpty ? emptyStringBuffer :
getTextSlice(start: relativeOffset, length: length)
return .init(kind: .fromRawValue(cpiece.kind), length: length, customText: customText)
}

func write<Target>(
to target: inout Target
) where Target: TextOutputStream {
Expand Down Expand Up @@ -719,6 +804,47 @@ struct RawSyntaxBase {
}
}

fileprivate func withUnsafeTokenText<Result>(
relativeOffset: Int,
extraPtr: DataElementPtr,
_ body: (UnsafeTokenText?) -> Result
) -> Result {
switch data {
case .token(let data):
return data.withUnsafeTokenText(relativeOffset: relativeOffset,
length: byteLength, extraPtr: extraPtr, body)
case .layout(_): return body(nil)
}
}

fileprivate func withUnsafeLeadingTriviaPiece<Result>(
at index: Int,
relativeOffset: Int,
extraPtr: DataElementPtr,
_ body: (UnsafeTriviaPiece?) -> Result
) -> Result {
switch data {
case .token(let data):
return data.withUnsafeLeadingTriviaPiece(at: index, relativeOffset: relativeOffset,
length: byteLength, extraPtr: extraPtr, body)
case .layout(_): return body(nil)
}
}

fileprivate func withUnsafeTrailingTriviaPiece<Result>(
at index: Int,
relativeOffset: Int,
extraPtr: DataElementPtr,
_ body: (UnsafeTriviaPiece?) -> Result
) -> Result {
switch data {
case .token(let data):
return data.withUnsafeTrailingTriviaPiece(at: index, relativeOffset: relativeOffset,
length: byteLength, extraPtr: extraPtr, body)
case .layout(_): return body(nil)
}
}

/// Prints the RawSyntax token. If self is a layout node it does nothing.
fileprivate func writeToken<Target>(
to target: inout Target, extraPtr: DataElementPtr
Expand Down Expand Up @@ -909,6 +1035,64 @@ final class RawSyntax: ManagedBuffer<RawSyntaxBase, RawSyntaxDataElement> {
}
}

/// Passes token info to the provided closure as `UnsafeTokenText`.
/// - Parameters:
/// - relativeOffset: For efficiency, the caller keeps track of the relative
/// byte offset (from start of leading trivia) of the token text, to avoid
/// calculating it within this function.
/// - body: The closure that accepts the `UnsafeTokenText` value. This value
/// must not escape the closure.
/// - Returns: Return value of `body`.
func withUnsafeTokenText<Result>(
relativeOffset: Int,
_ body: (UnsafeTokenText?) -> Result
) -> Result {
return withUnsafeMutablePointers {
$0.pointee.withUnsafeTokenText(relativeOffset: relativeOffset,
extraPtr: $1, body)
}
}

/// Passes trivia piece info to the provided closure as `UnsafeTriviaPiece`.
/// - Parameters:
/// - at: The index for the trivia piace.
/// - relativeOffset: For efficiency, the caller keeps track of the relative
/// byte offset (from start of leading trivia) of the trivia piece text,
/// to avoid calculating it within this function.
/// - body: The closure that accepts the `UnsafeTokenText` value. This value
/// must not escape the closure.
/// - Returns: Return value of `body`.
func withUnsafeLeadingTriviaPiece<Result>(
at index: Int,
relativeOffset: Int,
_ body: (UnsafeTriviaPiece?) -> Result
) -> Result {
return withUnsafeMutablePointers {
$0.pointee.withUnsafeLeadingTriviaPiece(at: index,
relativeOffset: relativeOffset, extraPtr: $1, body)
}
}

/// Passes trivia piece info to the provided closure as `UnsafeTriviaPiece`.
/// - Parameters:
/// - at: The index for the trivia piace.
/// - relativeOffset: For efficiency, the caller keeps track of the relative
/// byte offset (from start of leading trivia) of the trivia piece text,
/// to avoid calculating it within this function.
/// - body: The closure that accepts the `UnsafeTokenText` value. This value
/// must not escape the closure.
/// - Returns: Return value of `body`.
func withUnsafeTrailingTriviaPiece<Result>(
at index: Int,
relativeOffset: Int,
_ body: (UnsafeTriviaPiece?) -> Result
) -> Result {
return withUnsafeMutablePointers {
$0.pointee.withUnsafeTrailingTriviaPiece(at: index,
relativeOffset: relativeOffset, extraPtr: $1, body)
}
}

func formLayoutArray() -> [RawSyntax?] {
return withUnsafeMutablePointers {
$0.pointee.formLayoutArray(extraPtr: $1)
Expand Down Expand Up @@ -1114,3 +1298,51 @@ extension RawSyntax {
trailingTrivia: trailingTrivia, length: length, presence: presence)
}
}

/// Token info with its custom text as `UnsafeBufferPointer`. This is only safe
/// to use from within the `withUnsafeTokenText` methods.
internal struct UnsafeTokenText {
let kind: RawTokenKind
let length: Int
let customText: UnsafeBufferPointer<UInt8>

init(kind: RawTokenKind, length: Int) {
self.kind = kind
self.length = length
self.customText = .init(start: nil, count: 0)
}

init(kind: RawTokenKind, length: Int, customText: UnsafeBufferPointer<UInt8>) {
self.kind = kind
self.length = length
self.customText = customText
}
}

/// Trivia piece info with its custom text as `UnsafeBufferPointer`. This is
/// only safe to use from within the `withUnsafeLeadingTriviaPiece` and
/// `withUnsafeTrailingTriviaPiece` methods.
internal struct UnsafeTriviaPiece {
let kind: TriviaPieceKind
let length: Int
let customText: UnsafeBufferPointer<UInt8>

init(kind: TriviaPieceKind, length: Int) {
self.kind = kind
self.length = length
self.customText = .init(start: nil, count: 0)
}

init(kind: TriviaPieceKind, length: Int, customText: UnsafeBufferPointer<UInt8>) {
self.kind = kind
self.length = length
self.customText = customText
}

static func fromRawValue(
_ piece: CTriviaPiece, textBuffer: UnsafeBufferPointer<UInt8>
) -> UnsafeTriviaPiece {
return UnsafeTriviaPiece(kind: .fromRawValue(piece.kind),
length: Int(piece.length), customText: textBuffer)
}
}
33 changes: 33 additions & 0 deletions Sources/SwiftSyntax/Syntax.swift
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,21 @@ extension _SyntaxBase {
return TokenSequence(self)
}

/// Sequence of `SyntaxClassifiedRange`s for this syntax node.
var classifications: SyntaxClassifications {
let fullRange = ByteSourceRange(offset: 0, length: byteSize)
return SyntaxClassifications(self, in: fullRange)
}

/// Sequence of `SyntaxClassifiedRange`s contained in this syntax node within
/// a relative range.
/// - Parameters:
/// - in: The relative byte range to pull `SyntaxClassifiedRange`s from.
/// - Returns: Sequence of `SyntaxClassifiedRange`s.
func classifications(in range: ByteSourceRange) -> SyntaxClassifications {
return SyntaxClassifications(self, in: range)
}

/// Returns a value representing the unique identity of the node.
var uniqueIdentifier: SyntaxIdentifier {
return data.nodeId
Expand Down Expand Up @@ -464,6 +479,20 @@ extension Syntax {
return base.tokens
}

/// Sequence of `SyntaxClassifiedRange`s for this syntax node.
public var classifications: SyntaxClassifications {
return base.classifications
}

/// Sequence of `SyntaxClassifiedRange`s contained in this syntax node within
/// a relative range.
/// - Parameters:
/// - in: The relative byte range to pull `SyntaxClassifiedRange`s from.
/// - Returns: Sequence of `SyntaxClassifiedRange`s.
public func classifications(in range: ByteSourceRange) -> SyntaxClassifications {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we migrate the location structures here to SourceRange?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's easy to get a ByteSourceRange from SourceRange, the other way is not so convenient.
I would prefer not to require a SourceRange in this API but just add a var byteRange: ByteSourceRange property to SourceRange. If you have a SourceRange just pass its byteRange property to this API.

return base.classifications(in: range)
}

/// Returns a value representing the unique identity of the node.
public var uniqueIdentifier: SyntaxIdentifier {
return base.uniqueIdentifier
Expand Down Expand Up @@ -503,6 +532,10 @@ public struct TokenSyntax: _SyntaxBase, Hashable {
self.data = data
}

public var presence: SourcePresence {
return raw.presence
}

/// The text of the token as written in the source code.
public var text: String {
return tokenKind.text
Expand Down
Loading