Skip to content

Commit ce8fa85

Browse files
authored
Merge pull request #98 from akyrtzi/opt5-classifier
[classifier] Provide a more efficient classification mechanism
2 parents 8e45b00 + 7785499 commit ce8fa85

14 files changed

+963
-219
lines changed

Sources/SwiftSyntax/RawSyntax.swift

Lines changed: 232 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,55 @@ fileprivate struct TokenData {
282282
}
283283
}
284284

285+
fileprivate func withUnsafeTokenText<Result>(
286+
relativeOffset: Int,
287+
length: UInt32,
288+
extraPtr: DataElementPtr,
289+
_ body: (UnsafeTokenText) -> Result
290+
) -> Result {
291+
if isParsed {
292+
let data = parsedData(length: length, extraPtr: extraPtr)
293+
return body(data.getTokenText(relativeOffset: relativeOffset))
294+
} else {
295+
let tok: ConstructedTokenData = castElementAs(extraPtr).pointee
296+
return tok.kind.withUnsafeTokenText(body)
297+
}
298+
}
299+
300+
fileprivate func withUnsafeLeadingTriviaPiece<Result>(
301+
at index: Int,
302+
relativeOffset: Int,
303+
length: UInt32,
304+
extraPtr: DataElementPtr,
305+
_ body: (UnsafeTriviaPiece?) -> Result
306+
) -> Result {
307+
if isParsed {
308+
let data = parsedData(length: length, extraPtr: extraPtr)
309+
return body(data.getLeadingTriviaPiece(at: index, relativeOffset: relativeOffset))
310+
} else {
311+
let tok: ConstructedTokenData = castElementAs(extraPtr).pointee
312+
guard index < tok.leadingTrivia.count else { return body(nil) }
313+
return tok.leadingTrivia[index].withUnsafeTriviaPiece(body)
314+
}
315+
}
316+
317+
fileprivate func withUnsafeTrailingTriviaPiece<Result>(
318+
at index: Int,
319+
relativeOffset: Int,
320+
length: UInt32,
321+
extraPtr: DataElementPtr,
322+
_ body: (UnsafeTriviaPiece?) -> Result
323+
) -> Result {
324+
if isParsed {
325+
let data = parsedData(length: length, extraPtr: extraPtr)
326+
return body(data.getTrailingTriviaPiece(at: index, relativeOffset: relativeOffset))
327+
} else {
328+
let tok: ConstructedTokenData = castElementAs(extraPtr).pointee
329+
guard index < tok.trailingTrivia.count else { return body(nil) }
330+
return tok.trailingTrivia[index].withUnsafeTriviaPiece(body)
331+
}
332+
}
333+
285334
/// Prints the RawSyntax token.
286335
fileprivate func write<Target>(
287336
to target: inout Target, length: UInt32, extraPtr: DataElementPtr
@@ -368,6 +417,42 @@ fileprivate struct UnsafeParsedTokenData {
368417
return .init(pieces: newPieces)
369418
}
370419

420+
func getTokenText(relativeOffset: Int) -> UnsafeTokenText {
421+
let leadingTriviaLength = relativeOffset
422+
let trailingTriviaLength = self.getTrailingTriviaLength()
423+
let tokenLength = Int(length) - (leadingTriviaLength + trailingTriviaLength)
424+
let customText = fullTextBuffer.isEmpty ? emptyStringBuffer :
425+
getTextSlice(start: relativeOffset, length: tokenLength)
426+
return .init(kind: .fromRawValue(tokenKind), length: tokenLength, customText: customText)
427+
}
428+
429+
func getLeadingTriviaPiece(
430+
at index: Int, relativeOffset: Int
431+
) -> UnsafeTriviaPiece? {
432+
return getTriviaPiece(at: index, relativeOffset: relativeOffset,
433+
trivia: leadingTriviaBuffer)
434+
}
435+
436+
func getTrailingTriviaPiece(
437+
at index: Int, relativeOffset: Int
438+
) -> UnsafeTriviaPiece? {
439+
return getTriviaPiece(at: index, relativeOffset: relativeOffset,
440+
trivia: trailingTriviaBuffer)
441+
}
442+
443+
private func getTriviaPiece(
444+
at index: Int,
445+
relativeOffset: Int,
446+
trivia: UnsafeBufferPointer<CTriviaPiece>
447+
) -> UnsafeTriviaPiece? {
448+
guard index < trivia.count else { return nil }
449+
let cpiece = trivia[index]
450+
let length = Int(cpiece.length)
451+
let customText = fullTextBuffer.isEmpty ? emptyStringBuffer :
452+
getTextSlice(start: relativeOffset, length: length)
453+
return .init(kind: .fromRawValue(cpiece.kind), length: length, customText: customText)
454+
}
455+
371456
func write<Target>(
372457
to target: inout Target
373458
) where Target: TextOutputStream {
@@ -719,6 +804,47 @@ struct RawSyntaxBase {
719804
}
720805
}
721806

807+
fileprivate func withUnsafeTokenText<Result>(
808+
relativeOffset: Int,
809+
extraPtr: DataElementPtr,
810+
_ body: (UnsafeTokenText?) -> Result
811+
) -> Result {
812+
switch data {
813+
case .token(let data):
814+
return data.withUnsafeTokenText(relativeOffset: relativeOffset,
815+
length: byteLength, extraPtr: extraPtr, body)
816+
case .layout(_): return body(nil)
817+
}
818+
}
819+
820+
fileprivate func withUnsafeLeadingTriviaPiece<Result>(
821+
at index: Int,
822+
relativeOffset: Int,
823+
extraPtr: DataElementPtr,
824+
_ body: (UnsafeTriviaPiece?) -> Result
825+
) -> Result {
826+
switch data {
827+
case .token(let data):
828+
return data.withUnsafeLeadingTriviaPiece(at: index, relativeOffset: relativeOffset,
829+
length: byteLength, extraPtr: extraPtr, body)
830+
case .layout(_): return body(nil)
831+
}
832+
}
833+
834+
fileprivate func withUnsafeTrailingTriviaPiece<Result>(
835+
at index: Int,
836+
relativeOffset: Int,
837+
extraPtr: DataElementPtr,
838+
_ body: (UnsafeTriviaPiece?) -> Result
839+
) -> Result {
840+
switch data {
841+
case .token(let data):
842+
return data.withUnsafeTrailingTriviaPiece(at: index, relativeOffset: relativeOffset,
843+
length: byteLength, extraPtr: extraPtr, body)
844+
case .layout(_): return body(nil)
845+
}
846+
}
847+
722848
/// Prints the RawSyntax token. If self is a layout node it does nothing.
723849
fileprivate func writeToken<Target>(
724850
to target: inout Target, extraPtr: DataElementPtr
@@ -909,6 +1035,64 @@ final class RawSyntax: ManagedBuffer<RawSyntaxBase, RawSyntaxDataElement> {
9091035
}
9101036
}
9111037

1038+
/// Passes token info to the provided closure as `UnsafeTokenText`.
1039+
/// - Parameters:
1040+
/// - relativeOffset: For efficiency, the caller keeps track of the relative
1041+
/// byte offset (from start of leading trivia) of the token text, to avoid
1042+
/// calculating it within this function.
1043+
/// - body: The closure that accepts the `UnsafeTokenText` value. This value
1044+
/// must not escape the closure.
1045+
/// - Returns: Return value of `body`.
1046+
func withUnsafeTokenText<Result>(
1047+
relativeOffset: Int,
1048+
_ body: (UnsafeTokenText?) -> Result
1049+
) -> Result {
1050+
return withUnsafeMutablePointers {
1051+
$0.pointee.withUnsafeTokenText(relativeOffset: relativeOffset,
1052+
extraPtr: $1, body)
1053+
}
1054+
}
1055+
1056+
/// Passes trivia piece info to the provided closure as `UnsafeTriviaPiece`.
1057+
/// - Parameters:
1058+
/// - at: The index for the trivia piace.
1059+
/// - relativeOffset: For efficiency, the caller keeps track of the relative
1060+
/// byte offset (from start of leading trivia) of the trivia piece text,
1061+
/// to avoid calculating it within this function.
1062+
/// - body: The closure that accepts the `UnsafeTokenText` value. This value
1063+
/// must not escape the closure.
1064+
/// - Returns: Return value of `body`.
1065+
func withUnsafeLeadingTriviaPiece<Result>(
1066+
at index: Int,
1067+
relativeOffset: Int,
1068+
_ body: (UnsafeTriviaPiece?) -> Result
1069+
) -> Result {
1070+
return withUnsafeMutablePointers {
1071+
$0.pointee.withUnsafeLeadingTriviaPiece(at: index,
1072+
relativeOffset: relativeOffset, extraPtr: $1, body)
1073+
}
1074+
}
1075+
1076+
/// Passes trivia piece info to the provided closure as `UnsafeTriviaPiece`.
1077+
/// - Parameters:
1078+
/// - at: The index for the trivia piace.
1079+
/// - relativeOffset: For efficiency, the caller keeps track of the relative
1080+
/// byte offset (from start of leading trivia) of the trivia piece text,
1081+
/// to avoid calculating it within this function.
1082+
/// - body: The closure that accepts the `UnsafeTokenText` value. This value
1083+
/// must not escape the closure.
1084+
/// - Returns: Return value of `body`.
1085+
func withUnsafeTrailingTriviaPiece<Result>(
1086+
at index: Int,
1087+
relativeOffset: Int,
1088+
_ body: (UnsafeTriviaPiece?) -> Result
1089+
) -> Result {
1090+
return withUnsafeMutablePointers {
1091+
$0.pointee.withUnsafeTrailingTriviaPiece(at: index,
1092+
relativeOffset: relativeOffset, extraPtr: $1, body)
1093+
}
1094+
}
1095+
9121096
func formLayoutArray() -> [RawSyntax?] {
9131097
return withUnsafeMutablePointers {
9141098
$0.pointee.formLayoutArray(extraPtr: $1)
@@ -1121,3 +1305,51 @@ extension RawSyntax {
11211305
trailingTrivia: trailingTrivia, length: length, presence: presence)
11221306
}
11231307
}
1308+
1309+
/// Token info with its custom text as `UnsafeBufferPointer`. This is only safe
1310+
/// to use from within the `withUnsafeTokenText` methods.
1311+
internal struct UnsafeTokenText {
1312+
let kind: RawTokenKind
1313+
let length: Int
1314+
let customText: UnsafeBufferPointer<UInt8>
1315+
1316+
init(kind: RawTokenKind, length: Int) {
1317+
self.kind = kind
1318+
self.length = length
1319+
self.customText = .init(start: nil, count: 0)
1320+
}
1321+
1322+
init(kind: RawTokenKind, length: Int, customText: UnsafeBufferPointer<UInt8>) {
1323+
self.kind = kind
1324+
self.length = length
1325+
self.customText = customText
1326+
}
1327+
}
1328+
1329+
/// Trivia piece info with its custom text as `UnsafeBufferPointer`. This is
1330+
/// only safe to use from within the `withUnsafeLeadingTriviaPiece` and
1331+
/// `withUnsafeTrailingTriviaPiece` methods.
1332+
internal struct UnsafeTriviaPiece {
1333+
let kind: TriviaPieceKind
1334+
let length: Int
1335+
let customText: UnsafeBufferPointer<UInt8>
1336+
1337+
init(kind: TriviaPieceKind, length: Int) {
1338+
self.kind = kind
1339+
self.length = length
1340+
self.customText = .init(start: nil, count: 0)
1341+
}
1342+
1343+
init(kind: TriviaPieceKind, length: Int, customText: UnsafeBufferPointer<UInt8>) {
1344+
self.kind = kind
1345+
self.length = length
1346+
self.customText = customText
1347+
}
1348+
1349+
static func fromRawValue(
1350+
_ piece: CTriviaPiece, textBuffer: UnsafeBufferPointer<UInt8>
1351+
) -> UnsafeTriviaPiece {
1352+
return UnsafeTriviaPiece(kind: .fromRawValue(piece.kind),
1353+
length: Int(piece.length), customText: textBuffer)
1354+
}
1355+
}

Sources/SwiftSyntax/Syntax.swift

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,21 @@ extension _SyntaxBase {
250250
return TokenSequence(self)
251251
}
252252

253+
/// Sequence of `SyntaxClassifiedRange`s for this syntax node.
254+
var classifications: SyntaxClassifications {
255+
let fullRange = ByteSourceRange(offset: 0, length: byteSize)
256+
return SyntaxClassifications(self, in: fullRange)
257+
}
258+
259+
/// Sequence of `SyntaxClassifiedRange`s contained in this syntax node within
260+
/// a relative range.
261+
/// - Parameters:
262+
/// - in: The relative byte range to pull `SyntaxClassifiedRange`s from.
263+
/// - Returns: Sequence of `SyntaxClassifiedRange`s.
264+
func classifications(in range: ByteSourceRange) -> SyntaxClassifications {
265+
return SyntaxClassifications(self, in: range)
266+
}
267+
253268
/// Returns a value representing the unique identity of the node.
254269
var uniqueIdentifier: SyntaxIdentifier {
255270
return data.nodeId
@@ -462,6 +477,20 @@ extension Syntax {
462477
return base.tokens
463478
}
464479

480+
/// Sequence of `SyntaxClassifiedRange`s for this syntax node.
481+
public var classifications: SyntaxClassifications {
482+
return base.classifications
483+
}
484+
485+
/// Sequence of `SyntaxClassifiedRange`s contained in this syntax node within
486+
/// a relative range.
487+
/// - Parameters:
488+
/// - in: The relative byte range to pull `SyntaxClassifiedRange`s from.
489+
/// - Returns: Sequence of `SyntaxClassifiedRange`s.
490+
public func classifications(in range: ByteSourceRange) -> SyntaxClassifications {
491+
return base.classifications(in: range)
492+
}
493+
465494
/// Returns a value representing the unique identity of the node.
466495
public var uniqueIdentifier: SyntaxIdentifier {
467496
return base.uniqueIdentifier
@@ -501,6 +530,10 @@ public struct TokenSyntax: _SyntaxBase, Hashable {
501530
self.data = data
502531
}
503532

533+
public var presence: SourcePresence {
534+
return raw.presence
535+
}
536+
504537
/// The text of the token as written in the source code.
505538
public var text: String {
506539
return tokenKind.text

0 commit comments

Comments
 (0)