Skip to content

Commit 7785499

Browse files
committed
[classifier] Provide a more efficient classification mechanism
Provide 'classifications' as a sequence of (kind, range) values. This allows using the same type to classify source ranges across tokens and trivia pieces and makes the classification mechanism much more efficient since we avoid providing syntax nodes and the resulting casts to `_SyntaxBase` for establishing the parent hierarchy. The new classification mechanism is 10.2x faster than the previous mechanism, and 2.7x faster than using the SyntaxVisitor.
1 parent fc36c46 commit 7785499

14 files changed

+963
-219
lines changed

Sources/SwiftSyntax/RawSyntax.swift

Lines changed: 232 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,55 @@ fileprivate struct TokenData {
282282
}
283283
}
284284

285+
fileprivate func withUnsafeTokenText<Result>(
286+
relativeOffset: Int,
287+
length: UInt32,
288+
extraPtr: DataElementPtr,
289+
_ body: (UnsafeTokenText) -> Result
290+
) -> Result {
291+
if isParsed {
292+
let data = parsedData(length: length, extraPtr: extraPtr)
293+
return body(data.getTokenText(relativeOffset: relativeOffset))
294+
} else {
295+
let tok: ConstructedTokenData = castElementAs(extraPtr).pointee
296+
return tok.kind.withUnsafeTokenText(body)
297+
}
298+
}
299+
300+
fileprivate func withUnsafeLeadingTriviaPiece<Result>(
301+
at index: Int,
302+
relativeOffset: Int,
303+
length: UInt32,
304+
extraPtr: DataElementPtr,
305+
_ body: (UnsafeTriviaPiece?) -> Result
306+
) -> Result {
307+
if isParsed {
308+
let data = parsedData(length: length, extraPtr: extraPtr)
309+
return body(data.getLeadingTriviaPiece(at: index, relativeOffset: relativeOffset))
310+
} else {
311+
let tok: ConstructedTokenData = castElementAs(extraPtr).pointee
312+
guard index < tok.leadingTrivia.count else { return body(nil) }
313+
return tok.leadingTrivia[index].withUnsafeTriviaPiece(body)
314+
}
315+
}
316+
317+
fileprivate func withUnsafeTrailingTriviaPiece<Result>(
318+
at index: Int,
319+
relativeOffset: Int,
320+
length: UInt32,
321+
extraPtr: DataElementPtr,
322+
_ body: (UnsafeTriviaPiece?) -> Result
323+
) -> Result {
324+
if isParsed {
325+
let data = parsedData(length: length, extraPtr: extraPtr)
326+
return body(data.getTrailingTriviaPiece(at: index, relativeOffset: relativeOffset))
327+
} else {
328+
let tok: ConstructedTokenData = castElementAs(extraPtr).pointee
329+
guard index < tok.trailingTrivia.count else { return body(nil) }
330+
return tok.trailingTrivia[index].withUnsafeTriviaPiece(body)
331+
}
332+
}
333+
285334
/// Prints the RawSyntax token.
286335
fileprivate func write<Target>(
287336
to target: inout Target, length: UInt32, extraPtr: DataElementPtr
@@ -368,6 +417,42 @@ fileprivate struct UnsafeParsedTokenData {
368417
return .init(pieces: newPieces)
369418
}
370419

420+
func getTokenText(relativeOffset: Int) -> UnsafeTokenText {
421+
let leadingTriviaLength = relativeOffset
422+
let trailingTriviaLength = self.getTrailingTriviaLength()
423+
let tokenLength = Int(length) - (leadingTriviaLength + trailingTriviaLength)
424+
let customText = fullTextBuffer.isEmpty ? emptyStringBuffer :
425+
getTextSlice(start: relativeOffset, length: tokenLength)
426+
return .init(kind: .fromRawValue(tokenKind), length: tokenLength, customText: customText)
427+
}
428+
429+
func getLeadingTriviaPiece(
430+
at index: Int, relativeOffset: Int
431+
) -> UnsafeTriviaPiece? {
432+
return getTriviaPiece(at: index, relativeOffset: relativeOffset,
433+
trivia: leadingTriviaBuffer)
434+
}
435+
436+
func getTrailingTriviaPiece(
437+
at index: Int, relativeOffset: Int
438+
) -> UnsafeTriviaPiece? {
439+
return getTriviaPiece(at: index, relativeOffset: relativeOffset,
440+
trivia: trailingTriviaBuffer)
441+
}
442+
443+
private func getTriviaPiece(
444+
at index: Int,
445+
relativeOffset: Int,
446+
trivia: UnsafeBufferPointer<CTriviaPiece>
447+
) -> UnsafeTriviaPiece? {
448+
guard index < trivia.count else { return nil }
449+
let cpiece = trivia[index]
450+
let length = Int(cpiece.length)
451+
let customText = fullTextBuffer.isEmpty ? emptyStringBuffer :
452+
getTextSlice(start: relativeOffset, length: length)
453+
return .init(kind: .fromRawValue(cpiece.kind), length: length, customText: customText)
454+
}
455+
371456
func write<Target>(
372457
to target: inout Target
373458
) where Target: TextOutputStream {
@@ -719,6 +804,47 @@ struct RawSyntaxBase {
719804
}
720805
}
721806

807+
fileprivate func withUnsafeTokenText<Result>(
808+
relativeOffset: Int,
809+
extraPtr: DataElementPtr,
810+
_ body: (UnsafeTokenText?) -> Result
811+
) -> Result {
812+
switch data {
813+
case .token(let data):
814+
return data.withUnsafeTokenText(relativeOffset: relativeOffset,
815+
length: byteLength, extraPtr: extraPtr, body)
816+
case .layout(_): return body(nil)
817+
}
818+
}
819+
820+
fileprivate func withUnsafeLeadingTriviaPiece<Result>(
821+
at index: Int,
822+
relativeOffset: Int,
823+
extraPtr: DataElementPtr,
824+
_ body: (UnsafeTriviaPiece?) -> Result
825+
) -> Result {
826+
switch data {
827+
case .token(let data):
828+
return data.withUnsafeLeadingTriviaPiece(at: index, relativeOffset: relativeOffset,
829+
length: byteLength, extraPtr: extraPtr, body)
830+
case .layout(_): return body(nil)
831+
}
832+
}
833+
834+
fileprivate func withUnsafeTrailingTriviaPiece<Result>(
835+
at index: Int,
836+
relativeOffset: Int,
837+
extraPtr: DataElementPtr,
838+
_ body: (UnsafeTriviaPiece?) -> Result
839+
) -> Result {
840+
switch data {
841+
case .token(let data):
842+
return data.withUnsafeTrailingTriviaPiece(at: index, relativeOffset: relativeOffset,
843+
length: byteLength, extraPtr: extraPtr, body)
844+
case .layout(_): return body(nil)
845+
}
846+
}
847+
722848
/// Prints the RawSyntax token. If self is a layout node it does nothing.
723849
fileprivate func writeToken<Target>(
724850
to target: inout Target, extraPtr: DataElementPtr
@@ -909,6 +1035,64 @@ final class RawSyntax: ManagedBuffer<RawSyntaxBase, RawSyntaxDataElement> {
9091035
}
9101036
}
9111037

1038+
/// Passes token info to the provided closure as `UnsafeTokenText`.
1039+
/// - Parameters:
1040+
/// - relativeOffset: For efficiency, the caller keeps track of the relative
1041+
/// byte offset (from start of leading trivia) of the token text, to avoid
1042+
/// calculating it within this function.
1043+
/// - body: The closure that accepts the `UnsafeTokenText` value. This value
1044+
/// must not escape the closure.
1045+
/// - Returns: Return value of `body`.
1046+
func withUnsafeTokenText<Result>(
1047+
relativeOffset: Int,
1048+
_ body: (UnsafeTokenText?) -> Result
1049+
) -> Result {
1050+
return withUnsafeMutablePointers {
1051+
$0.pointee.withUnsafeTokenText(relativeOffset: relativeOffset,
1052+
extraPtr: $1, body)
1053+
}
1054+
}
1055+
1056+
/// Passes trivia piece info to the provided closure as `UnsafeTriviaPiece`.
1057+
/// - Parameters:
1058+
/// - at: The index for the trivia piace.
1059+
/// - relativeOffset: For efficiency, the caller keeps track of the relative
1060+
/// byte offset (from start of leading trivia) of the trivia piece text,
1061+
/// to avoid calculating it within this function.
1062+
/// - body: The closure that accepts the `UnsafeTokenText` value. This value
1063+
/// must not escape the closure.
1064+
/// - Returns: Return value of `body`.
1065+
func withUnsafeLeadingTriviaPiece<Result>(
1066+
at index: Int,
1067+
relativeOffset: Int,
1068+
_ body: (UnsafeTriviaPiece?) -> Result
1069+
) -> Result {
1070+
return withUnsafeMutablePointers {
1071+
$0.pointee.withUnsafeLeadingTriviaPiece(at: index,
1072+
relativeOffset: relativeOffset, extraPtr: $1, body)
1073+
}
1074+
}
1075+
1076+
/// Passes trivia piece info to the provided closure as `UnsafeTriviaPiece`.
1077+
/// - Parameters:
1078+
/// - at: The index for the trivia piace.
1079+
/// - relativeOffset: For efficiency, the caller keeps track of the relative
1080+
/// byte offset (from start of leading trivia) of the trivia piece text,
1081+
/// to avoid calculating it within this function.
1082+
/// - body: The closure that accepts the `UnsafeTokenText` value. This value
1083+
/// must not escape the closure.
1084+
/// - Returns: Return value of `body`.
1085+
func withUnsafeTrailingTriviaPiece<Result>(
1086+
at index: Int,
1087+
relativeOffset: Int,
1088+
_ body: (UnsafeTriviaPiece?) -> Result
1089+
) -> Result {
1090+
return withUnsafeMutablePointers {
1091+
$0.pointee.withUnsafeTrailingTriviaPiece(at: index,
1092+
relativeOffset: relativeOffset, extraPtr: $1, body)
1093+
}
1094+
}
1095+
9121096
func formLayoutArray() -> [RawSyntax?] {
9131097
return withUnsafeMutablePointers {
9141098
$0.pointee.formLayoutArray(extraPtr: $1)
@@ -1114,3 +1298,51 @@ extension RawSyntax {
11141298
trailingTrivia: trailingTrivia, length: length, presence: presence)
11151299
}
11161300
}
1301+
1302+
/// Token info with its custom text as `UnsafeBufferPointer`. This is only safe
1303+
/// to use from within the `withUnsafeTokenText` methods.
1304+
internal struct UnsafeTokenText {
1305+
let kind: RawTokenKind
1306+
let length: Int
1307+
let customText: UnsafeBufferPointer<UInt8>
1308+
1309+
init(kind: RawTokenKind, length: Int) {
1310+
self.kind = kind
1311+
self.length = length
1312+
self.customText = .init(start: nil, count: 0)
1313+
}
1314+
1315+
init(kind: RawTokenKind, length: Int, customText: UnsafeBufferPointer<UInt8>) {
1316+
self.kind = kind
1317+
self.length = length
1318+
self.customText = customText
1319+
}
1320+
}
1321+
1322+
/// Trivia piece info with its custom text as `UnsafeBufferPointer`. This is
1323+
/// only safe to use from within the `withUnsafeLeadingTriviaPiece` and
1324+
/// `withUnsafeTrailingTriviaPiece` methods.
1325+
internal struct UnsafeTriviaPiece {
1326+
let kind: TriviaPieceKind
1327+
let length: Int
1328+
let customText: UnsafeBufferPointer<UInt8>
1329+
1330+
init(kind: TriviaPieceKind, length: Int) {
1331+
self.kind = kind
1332+
self.length = length
1333+
self.customText = .init(start: nil, count: 0)
1334+
}
1335+
1336+
init(kind: TriviaPieceKind, length: Int, customText: UnsafeBufferPointer<UInt8>) {
1337+
self.kind = kind
1338+
self.length = length
1339+
self.customText = customText
1340+
}
1341+
1342+
static func fromRawValue(
1343+
_ piece: CTriviaPiece, textBuffer: UnsafeBufferPointer<UInt8>
1344+
) -> UnsafeTriviaPiece {
1345+
return UnsafeTriviaPiece(kind: .fromRawValue(piece.kind),
1346+
length: Int(piece.length), customText: textBuffer)
1347+
}
1348+
}

Sources/SwiftSyntax/Syntax.swift

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,21 @@ extension _SyntaxBase {
250250
return TokenSequence(self)
251251
}
252252

253+
/// Sequence of `SyntaxClassifiedRange`s for this syntax node.
254+
var classifications: SyntaxClassifications {
255+
let fullRange = ByteSourceRange(offset: 0, length: byteSize)
256+
return SyntaxClassifications(self, in: fullRange)
257+
}
258+
259+
/// Sequence of `SyntaxClassifiedRange`s contained in this syntax node within
260+
/// a relative range.
261+
/// - Parameters:
262+
/// - in: The relative byte range to pull `SyntaxClassifiedRange`s from.
263+
/// - Returns: Sequence of `SyntaxClassifiedRange`s.
264+
func classifications(in range: ByteSourceRange) -> SyntaxClassifications {
265+
return SyntaxClassifications(self, in: range)
266+
}
267+
253268
/// Returns a value representing the unique identity of the node.
254269
var uniqueIdentifier: SyntaxIdentifier {
255270
return data.nodeId
@@ -464,6 +479,20 @@ extension Syntax {
464479
return base.tokens
465480
}
466481

482+
/// Sequence of `SyntaxClassifiedRange`s for this syntax node.
483+
public var classifications: SyntaxClassifications {
484+
return base.classifications
485+
}
486+
487+
/// Sequence of `SyntaxClassifiedRange`s contained in this syntax node within
488+
/// a relative range.
489+
/// - Parameters:
490+
/// - in: The relative byte range to pull `SyntaxClassifiedRange`s from.
491+
/// - Returns: Sequence of `SyntaxClassifiedRange`s.
492+
public func classifications(in range: ByteSourceRange) -> SyntaxClassifications {
493+
return base.classifications(in: range)
494+
}
495+
467496
/// Returns a value representing the unique identity of the node.
468497
public var uniqueIdentifier: SyntaxIdentifier {
469498
return base.uniqueIdentifier
@@ -503,6 +532,10 @@ public struct TokenSyntax: _SyntaxBase, Hashable {
503532
self.data = data
504533
}
505534

535+
public var presence: SourcePresence {
536+
return raw.presence
537+
}
538+
506539
/// The text of the token as written in the source code.
507540
public var text: String {
508541
return tokenKind.text

0 commit comments

Comments
 (0)