Skip to content

Commit 859b5d5

Browse files
authored
Merge pull request swiftlang#145 from owenv/bitstream-2
Introduce a new visitor-based API for reading LLVM bitstreams
2 parents 063dd1d + d8d46f3 commit 859b5d5

File tree

4 files changed

+354
-71
lines changed

4 files changed

+354
-71
lines changed

Sources/TSCUtility/Bits.swift

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,15 @@ struct Bits: RandomAccessCollection {
8484
return buffer.buffer.dropFirst(offset >> 3).prefix((newOffset - offset) >> 3)
8585
}
8686

87+
mutating func skip(bytes count: Int) throws {
88+
precondition(count >= 0)
89+
precondition(offset & 0b111 == 0)
90+
let newOffset = offset &+ (count << 3)
91+
precondition(newOffset >= offset)
92+
if newOffset > buffer.count { throw Error.bufferOverflow }
93+
offset = newOffset
94+
}
95+
8796
mutating func advance(toBitAlignment align: Int) throws {
8897
precondition(align > 0)
8998
precondition(offset &+ (align&-1) >= offset)

Sources/TSCUtility/Bitstream.swift

Lines changed: 102 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -10,43 +10,43 @@
1010

1111
import Foundation
1212

13-
enum BitcodeElement {
14-
struct Block {
15-
var id: UInt64
16-
var elements: [BitcodeElement]
13+
public enum BitcodeElement {
14+
public struct Block {
15+
public var id: UInt64
16+
public var elements: [BitcodeElement]
1717
}
1818

19-
struct Record {
20-
enum Payload {
19+
public struct Record {
20+
public enum Payload {
2121
case none
2222
case array([UInt64])
2323
case char6String(String)
2424
case blob(Data)
2525
}
2626

27-
var id: UInt64
28-
var fields: [UInt64]
29-
var payload: Payload
27+
public var id: UInt64
28+
public var fields: [UInt64]
29+
public var payload: Payload
3030
}
3131

3232
case block(Block)
3333
case record(Record)
3434
}
3535

36-
struct BlockInfo {
37-
var name: String = ""
38-
var recordNames: [UInt64: String] = [:]
36+
public struct BlockInfo {
37+
public var name: String = ""
38+
public var recordNames: [UInt64: String] = [:]
3939
}
4040

4141
extension Bitcode {
42-
struct Signature: Equatable {
42+
public struct Signature: Equatable {
4343
private var value: UInt32
4444

45-
init(value: UInt32) {
45+
public init(value: UInt32) {
4646
self.value = value
4747
}
4848

49-
init(string: String) {
49+
public init(string: String) {
5050
precondition(string.utf8.count == 4)
5151
var result: UInt32 = 0
5252
for byte in string.utf8.reversed() {
@@ -58,10 +58,12 @@ extension Bitcode {
5858
}
5959
}
6060

61-
struct Bitcode {
62-
let signature: Signature
63-
let elements: [BitcodeElement]
64-
let blockInfo: [UInt64: BlockInfo]
61+
/// Represents the contents of a file encoded using the
62+
/// [LLVM bitstream container format](https://llvm.org/docs/BitCodeFormat.html#bitstream-container-format)
63+
public struct Bitcode {
64+
public let signature: Signature
65+
public let elements: [BitcodeElement]
66+
public let blockInfo: [UInt64: BlockInfo]
6567
}
6668

6769
private extension Bits.Cursor {
@@ -297,22 +299,22 @@ private struct BitstreamReader {
297299
}
298300
}
299301

300-
mutating func readBlock(id: UInt64, abbrevWidth: Int, abbrevInfo: [Abbrev]) throws -> [BitcodeElement] {
302+
mutating func readBlock<Visitor: BitstreamVisitor>(id: UInt64, abbrevWidth: Int, abbrevInfo: [Abbrev], visitor: inout Visitor) throws {
301303
var abbrevInfo = abbrevInfo
302-
var elements = [BitcodeElement]()
303304

304305
while !cursor.isAtEnd {
305306
switch try cursor.read(abbrevWidth) {
306307
case 0: // END_BLOCK
307308
try cursor.advance(toBitAlignment: 32)
308309
// FIXME: check expected length
309-
return elements
310+
try visitor.didExitBlock()
311+
return
310312

311313
case 1: // ENTER_SUBBLOCK
312314
let blockID = try cursor.readVBR(8)
313315
let newAbbrevWidth = Int(try cursor.readVBR(4))
314316
try cursor.advance(toBitAlignment: 32)
315-
_ = try cursor.read(32) // FIXME: use expected length
317+
let blockLength = try cursor.read(32) * 4
316318

317319
switch blockID {
318320
case 0:
@@ -321,9 +323,13 @@ private struct BitstreamReader {
321323
// Metadata blocks we don't understand yet
322324
fallthrough
323325
default:
324-
let innerElements = try readBlock(
325-
id: blockID, abbrevWidth: newAbbrevWidth, abbrevInfo: globalAbbrevs[blockID] ?? [])
326-
elements.append(.block(.init(id: blockID, elements: innerElements)))
326+
guard try visitor.shouldEnterBlock(id: blockID) else {
327+
try cursor.skip(bytes: Int(blockLength))
328+
break
329+
}
330+
try readBlock(
331+
id: blockID, abbrevWidth: newAbbrevWidth,
332+
abbrevInfo: globalAbbrevs[blockID] ?? [], visitor: &visitor)
327333
}
328334

329335
case 2: // DEFINE_ABBREV
@@ -337,33 +343,97 @@ private struct BitstreamReader {
337343
for _ in 0..<numOps {
338344
operands.append(try cursor.readVBR(6))
339345
}
340-
elements.append(.record(.init(id: code, fields: operands, payload: .none)))
346+
try visitor.visit(record: .init(id: code, fields: operands, payload: .none))
341347

342348
case let abbrevID:
343349
guard Int(abbrevID) - 4 < abbrevInfo.count else {
344350
throw Error.noSuchAbbrev(blockID: id, abbrevID: Int(abbrevID))
345351
}
346-
elements.append(.record(try readAbbreviatedRecord(abbrevInfo[Int(abbrevID) - 4])))
352+
try visitor.visit(record: try readAbbreviatedRecord(abbrevInfo[Int(abbrevID) - 4]))
347353
}
348354
}
349355

350356
guard id == Self.fakeTopLevelBlockID else {
351357
throw Error.missingEndBlock(blockID: id)
352358
}
353-
return elements
354359
}
355360

356361
static let fakeTopLevelBlockID: UInt64 = ~0
357362
}
358363

364+
/// A visitor which receives callbacks while reading a bitstream.
365+
public protocol BitstreamVisitor {
366+
/// Customization point to validate a bitstream's signature or "magic number".
367+
func validate(signature: Bitcode.Signature) throws
368+
/// Called when a new block is encountered. Return `true` to enter the block
369+
/// and read its contents, or `false` to skip it.
370+
mutating func shouldEnterBlock(id: UInt64) throws -> Bool
371+
/// Called when a block is exited.
372+
mutating func didExitBlock() throws
373+
/// Called whenever a record is encountered.
374+
mutating func visit(record: BitcodeElement.Record) throws
375+
}
376+
377+
/// A basic visitor that collects all the blocks and records in a stream.
378+
private struct CollectingVisitor: BitstreamVisitor {
379+
var stack: [(UInt64, [BitcodeElement])] = [(BitstreamReader.fakeTopLevelBlockID, [])]
380+
381+
func validate(signature: Bitcode.Signature) throws {}
382+
383+
mutating func shouldEnterBlock(id: UInt64) throws -> Bool {
384+
stack.append((id, []))
385+
return true
386+
}
387+
388+
mutating func didExitBlock() throws {
389+
guard let (id, elements) = stack.popLast() else {
390+
fatalError("Unbalanced calls to shouldEnterBlock/didExitBlock")
391+
}
392+
393+
let block = BitcodeElement.Block(id: id, elements: elements)
394+
stack[stack.endIndex-1].1.append(.block(block))
395+
}
396+
397+
mutating func visit(record: BitcodeElement.Record) throws {
398+
stack[stack.endIndex-1].1.append(.record(record))
399+
}
400+
401+
func finalizeTopLevelElements() -> [BitcodeElement] {
402+
assert(stack.count == 1)
403+
return stack[0].1
404+
}
405+
}
406+
359407
extension Bitcode {
360-
init(data: Data) throws {
408+
/// Parse a bitstream from data.
409+
public init(data: Data) throws {
361410
precondition(data.count > 4)
362411
let signatureValue = UInt32(Bits(buffer: data).readBits(atOffset: 0, count: 32))
363412
let bitstreamData = data[4..<data.count]
364413

365414
var reader = BitstreamReader(buffer: bitstreamData)
366-
let topLevelElements = try reader.readBlock(id: BitstreamReader.fakeTopLevelBlockID, abbrevWidth: 2, abbrevInfo: [])
367-
self.init(signature: .init(value: signatureValue), elements: topLevelElements, blockInfo: reader.blockInfo)
415+
var visitor = CollectingVisitor()
416+
try reader.readBlock(id: BitstreamReader.fakeTopLevelBlockID,
417+
abbrevWidth: 2,
418+
abbrevInfo: [],
419+
visitor: &visitor)
420+
self.init(signature: .init(value: signatureValue),
421+
elements: visitor.finalizeTopLevelElements(),
422+
blockInfo: reader.blockInfo)
423+
}
424+
425+
/// Traverse a bitstream using the specified `visitor`, which will receive
426+
/// callbacks when blocks and records are encountered.
427+
public static func read<Visitor: BitstreamVisitor>(stream data: Data, using visitor: inout Visitor) throws {
428+
precondition(data.count > 4)
429+
let signatureValue = UInt32(Bits(buffer: data).readBits(atOffset: 0, count: 32))
430+
try visitor.validate(signature: .init(value: signatureValue))
431+
432+
let bitstreamData = data[4..<data.count]
433+
var reader = BitstreamReader(buffer: bitstreamData)
434+
try reader.readBlock(id: BitstreamReader.fakeTopLevelBlockID,
435+
abbrevWidth: 2,
436+
abbrevInfo: [],
437+
visitor: &visitor)
368438
}
369439
}

Sources/TSCUtility/SerializedDiagnostics.swift

Lines changed: 57 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -43,40 +43,11 @@ public struct SerializedDiagnostics {
4343
public var diagnostics: [Diagnostic]
4444

4545
public init(data: Data) throws {
46-
let bitcode = try Bitcode(data: data)
47-
48-
guard bitcode.signature == .init(string: "DIAG") else { throw Error.badMagic }
49-
50-
var diagnostics: [Diagnostic] = []
51-
var versionNumber: Int? = nil
52-
var filenameMap = [UInt64: String]()
53-
var flagMap = [UInt64: String]()
54-
var categoryMap = [UInt64: String]()
55-
56-
for element in bitcode.elements {
57-
guard case let .block(block) = element else { throw Error.unexpectedTopLevelRecord }
58-
switch BlockID(rawValue: block.id) {
59-
case .metadata:
60-
guard block.elements.count == 1,
61-
case let .record(versionRecord) = block.elements[0],
62-
versionRecord.id == RecordID.version.rawValue,
63-
versionRecord.fields.count == 1 else {
64-
throw Error.malformedRecord
65-
}
66-
versionNumber = Int(versionRecord.fields[0])
67-
case .diagnostic:
68-
diagnostics.append(try Diagnostic(block: block,
69-
filenameMap: &filenameMap,
70-
flagMap: &flagMap,
71-
categoryMap: &categoryMap))
72-
case nil:
73-
throw Error.unknownBlock
74-
}
75-
}
76-
77-
guard let version = versionNumber else { throw Error.noMetadataBlock }
46+
var reader = Reader()
47+
try Bitcode.read(stream: data, using: &reader)
48+
guard let version = reader.versionNumber else { throw Error.noMetadataBlock }
7849
self.versionNumber = version
79-
self.diagnostics = diagnostics
50+
self.diagnostics = reader.diagnostics
8051
}
8152
}
8253

@@ -101,7 +72,7 @@ extension SerializedDiagnostics {
10172
/// Fix-its associated with the diagnostic.
10273
public var fixIts: [FixIt]
10374

104-
fileprivate init(block: BitcodeElement.Block,
75+
fileprivate init(records: [BitcodeElement.Record],
10576
filenameMap: inout [UInt64: String],
10677
flagMap: inout [UInt64: String],
10778
categoryMap: inout [UInt64: String]) throws {
@@ -113,11 +84,7 @@ extension SerializedDiagnostics {
11384
var ranges: [(SourceLocation, SourceLocation)] = []
11485
var fixIts: [FixIt] = []
11586

116-
for element in block.elements {
117-
guard case let .record(record) = element else {
118-
throw Error.unexpectedSubblock
119-
}
120-
87+
for record in records {
12188
switch SerializedDiagnostics.RecordID(rawValue: record.id) {
12289
case .diagnosticInfo:
12390
guard record.fields.count == 8,
@@ -230,3 +197,54 @@ extension SerializedDiagnostics {
230197
public var text: String
231198
}
232199
}
200+
201+
extension SerializedDiagnostics {
202+
private struct Reader: BitstreamVisitor {
203+
var currentBlockID: BlockID? = nil
204+
205+
var diagnostics: [Diagnostic] = []
206+
var versionNumber: Int? = nil
207+
var filenameMap = [UInt64: String]()
208+
var flagMap = [UInt64: String]()
209+
var categoryMap = [UInt64: String]()
210+
211+
var currentDiagnosticRecords: [BitcodeElement.Record] = []
212+
213+
func validate(signature: Bitcode.Signature) throws {
214+
guard signature == .init(string: "DIAG") else { throw Error.badMagic }
215+
}
216+
217+
mutating func shouldEnterBlock(id: UInt64) throws -> Bool {
218+
guard let blockID = BlockID(rawValue: id) else { throw Error.unknownBlock }
219+
guard currentBlockID == nil else { throw Error.unexpectedSubblock }
220+
currentBlockID = blockID
221+
return true
222+
}
223+
224+
mutating func didExitBlock() throws {
225+
if currentBlockID == .diagnostic {
226+
diagnostics.append(try Diagnostic(records: currentDiagnosticRecords,
227+
filenameMap: &filenameMap,
228+
flagMap: &flagMap,
229+
categoryMap: &categoryMap))
230+
currentDiagnosticRecords = []
231+
}
232+
currentBlockID = nil
233+
}
234+
235+
mutating func visit(record: BitcodeElement.Record) throws {
236+
switch currentBlockID {
237+
case .metadata:
238+
guard record.id == RecordID.version.rawValue,
239+
record.fields.count == 1 else {
240+
throw Error.malformedRecord
241+
}
242+
versionNumber = Int(record.fields[0])
243+
case .diagnostic:
244+
currentDiagnosticRecords.append(record)
245+
case nil:
246+
throw Error.unexpectedTopLevelRecord
247+
}
248+
}
249+
}
250+
}

0 commit comments

Comments
 (0)