Skip to content

Commit ce90205

Browse files
authored
Merge pull request #251 from ahoppen/utf8-string
Make sure we convert the source to a native UTF-8 string before parsing
2 parents 35eba3f + efb6be3 commit ce90205

File tree

7 files changed

+29
-42
lines changed

7 files changed

+29
-42
lines changed

Sources/SwiftSyntax/SyntaxParser.swift

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ public enum SyntaxParser {
8989
// If the string is backed by an NSString then such indexing will become
9090
// extremely slow.
9191
var utf8Source = source
92-
utf8Source.makeNativeUTF8IfNeeded()
92+
utf8Source.makeContiguousUTF8()
9393

9494
let rawSyntax = parseRaw(utf8Source, parseTransition, filenameForDiagnostics,
9595
diagnosticEngine)
@@ -127,7 +127,7 @@ public enum SyntaxParser {
127127
_ filenameForDiagnostics: String,
128128
_ diagnosticEngine: DiagnosticEngine?
129129
) -> RawSyntax {
130-
assert(source.isNativeUTF8)
130+
precondition(source.isContiguousUTF8)
131131
let c_parser = swiftparse_parser_create()
132132
defer {
133133
swiftparse_parser_dispose(c_parser)

Sources/SwiftSyntax/TokenKind.swift.gyb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,7 @@ extension TokenKind {
172172
return body(.init(kind: .${token.swift_kind()}, length: length))
173173
% else:
174174
case .${token.swift_kind()}(var text):
175-
text.makeNativeUTF8IfNeeded()
175+
text.makeContiguousUTF8()
176176
let length = text.utf8.count
177177
return text.utf8.withContiguousStorageIfAvailable({ (buf: UnsafeBufferPointer<UInt8>) in
178178
return body(.init(kind: .${token.swift_kind()}, length: length, customText: buf))

Sources/SwiftSyntax/Trivia.swift.gyb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,7 @@ extension TriviaPiece {
236236
return body(.init(kind: .${trivia.lower_name}s, length: length))
237237
% else:
238238
case var .${trivia.lower_name}(text):
239-
text.makeNativeUTF8IfNeeded()
239+
text.makeContiguousUTF8()
240240
let length = text.utf8.count
241241
return text.utf8.withContiguousStorageIfAvailable({ (buf: UnsafeBufferPointer<UInt8>) in
242242
return body(.init(kind: .${trivia.lower_name}, length: length, customText: buf))

Sources/SwiftSyntax/Utils.swift

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -73,26 +73,6 @@ extension String {
7373
static func fromBuffer(_ textBuffer: UnsafeBufferPointer<UInt8>) -> String {
7474
return String(decoding: textBuffer, as: UTF8.self)
7575
}
76-
77-
var isNativeUTF8: Bool {
78-
return utf8.withContiguousStorageIfAvailable { _ in 0 } != nil
79-
}
80-
81-
mutating func makeNativeUTF8IfNeeded() {
82-
if !isNativeUTF8 {
83-
self += ""
84-
}
85-
}
86-
87-
func utf8Slice(offset: Int, length: Int) -> Substring {
88-
if length == 0 {
89-
return Substring()
90-
}
91-
let utf8 = self.utf8
92-
let begin = utf8.index(utf8.startIndex, offsetBy: offset)
93-
let end = utf8.index(begin, offsetBy: length)
94-
return Substring(utf8[begin..<end])
95-
}
9676
}
9777

9878
extension UnsafeBufferPointer where Element == UInt8 {

Sources/SwiftSyntax/gyb_generated/TokenKind.swift

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1771,79 +1771,79 @@ extension TokenKind {
17711771
let length = 13
17721772
return body(.init(kind: .poundColorLiteralKeyword, length: length))
17731773
case .integerLiteral(var text):
1774-
text.makeNativeUTF8IfNeeded()
1774+
text.makeContiguousUTF8()
17751775
let length = text.utf8.count
17761776
return text.utf8.withContiguousStorageIfAvailable({ (buf: UnsafeBufferPointer<UInt8>) in
17771777
return body(.init(kind: .integerLiteral, length: length, customText: buf))
17781778
})!
17791779
case .floatingLiteral(var text):
1780-
text.makeNativeUTF8IfNeeded()
1780+
text.makeContiguousUTF8()
17811781
let length = text.utf8.count
17821782
return text.utf8.withContiguousStorageIfAvailable({ (buf: UnsafeBufferPointer<UInt8>) in
17831783
return body(.init(kind: .floatingLiteral, length: length, customText: buf))
17841784
})!
17851785
case .stringLiteral(var text):
1786-
text.makeNativeUTF8IfNeeded()
1786+
text.makeContiguousUTF8()
17871787
let length = text.utf8.count
17881788
return text.utf8.withContiguousStorageIfAvailable({ (buf: UnsafeBufferPointer<UInt8>) in
17891789
return body(.init(kind: .stringLiteral, length: length, customText: buf))
17901790
})!
17911791
case .unknown(var text):
1792-
text.makeNativeUTF8IfNeeded()
1792+
text.makeContiguousUTF8()
17931793
let length = text.utf8.count
17941794
return text.utf8.withContiguousStorageIfAvailable({ (buf: UnsafeBufferPointer<UInt8>) in
17951795
return body(.init(kind: .unknown, length: length, customText: buf))
17961796
})!
17971797
case .identifier(var text):
1798-
text.makeNativeUTF8IfNeeded()
1798+
text.makeContiguousUTF8()
17991799
let length = text.utf8.count
18001800
return text.utf8.withContiguousStorageIfAvailable({ (buf: UnsafeBufferPointer<UInt8>) in
18011801
return body(.init(kind: .identifier, length: length, customText: buf))
18021802
})!
18031803
case .unspacedBinaryOperator(var text):
1804-
text.makeNativeUTF8IfNeeded()
1804+
text.makeContiguousUTF8()
18051805
let length = text.utf8.count
18061806
return text.utf8.withContiguousStorageIfAvailable({ (buf: UnsafeBufferPointer<UInt8>) in
18071807
return body(.init(kind: .unspacedBinaryOperator, length: length, customText: buf))
18081808
})!
18091809
case .spacedBinaryOperator(var text):
1810-
text.makeNativeUTF8IfNeeded()
1810+
text.makeContiguousUTF8()
18111811
let length = text.utf8.count
18121812
return text.utf8.withContiguousStorageIfAvailable({ (buf: UnsafeBufferPointer<UInt8>) in
18131813
return body(.init(kind: .spacedBinaryOperator, length: length, customText: buf))
18141814
})!
18151815
case .postfixOperator(var text):
1816-
text.makeNativeUTF8IfNeeded()
1816+
text.makeContiguousUTF8()
18171817
let length = text.utf8.count
18181818
return text.utf8.withContiguousStorageIfAvailable({ (buf: UnsafeBufferPointer<UInt8>) in
18191819
return body(.init(kind: .postfixOperator, length: length, customText: buf))
18201820
})!
18211821
case .prefixOperator(var text):
1822-
text.makeNativeUTF8IfNeeded()
1822+
text.makeContiguousUTF8()
18231823
let length = text.utf8.count
18241824
return text.utf8.withContiguousStorageIfAvailable({ (buf: UnsafeBufferPointer<UInt8>) in
18251825
return body(.init(kind: .prefixOperator, length: length, customText: buf))
18261826
})!
18271827
case .dollarIdentifier(var text):
1828-
text.makeNativeUTF8IfNeeded()
1828+
text.makeContiguousUTF8()
18291829
let length = text.utf8.count
18301830
return text.utf8.withContiguousStorageIfAvailable({ (buf: UnsafeBufferPointer<UInt8>) in
18311831
return body(.init(kind: .dollarIdentifier, length: length, customText: buf))
18321832
})!
18331833
case .contextualKeyword(var text):
1834-
text.makeNativeUTF8IfNeeded()
1834+
text.makeContiguousUTF8()
18351835
let length = text.utf8.count
18361836
return text.utf8.withContiguousStorageIfAvailable({ (buf: UnsafeBufferPointer<UInt8>) in
18371837
return body(.init(kind: .contextualKeyword, length: length, customText: buf))
18381838
})!
18391839
case .rawStringDelimiter(var text):
1840-
text.makeNativeUTF8IfNeeded()
1840+
text.makeContiguousUTF8()
18411841
let length = text.utf8.count
18421842
return text.utf8.withContiguousStorageIfAvailable({ (buf: UnsafeBufferPointer<UInt8>) in
18431843
return body(.init(kind: .rawStringDelimiter, length: length, customText: buf))
18441844
})!
18451845
case .stringSegment(var text):
1846-
text.makeNativeUTF8IfNeeded()
1846+
text.makeContiguousUTF8()
18471847
let length = text.utf8.count
18481848
return text.utf8.withContiguousStorageIfAvailable({ (buf: UnsafeBufferPointer<UInt8>) in
18491849
return body(.init(kind: .stringSegment, length: length, customText: buf))

Sources/SwiftSyntax/gyb_generated/Trivia.swift

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -366,31 +366,31 @@ extension TriviaPiece {
366366
let length = count * 2
367367
return body(.init(kind: .carriageReturnLineFeeds, length: length))
368368
case var .lineComment(text):
369-
text.makeNativeUTF8IfNeeded()
369+
text.makeContiguousUTF8()
370370
let length = text.utf8.count
371371
return text.utf8.withContiguousStorageIfAvailable({ (buf: UnsafeBufferPointer<UInt8>) in
372372
return body(.init(kind: .lineComment, length: length, customText: buf))
373373
})!
374374
case var .blockComment(text):
375-
text.makeNativeUTF8IfNeeded()
375+
text.makeContiguousUTF8()
376376
let length = text.utf8.count
377377
return text.utf8.withContiguousStorageIfAvailable({ (buf: UnsafeBufferPointer<UInt8>) in
378378
return body(.init(kind: .blockComment, length: length, customText: buf))
379379
})!
380380
case var .docLineComment(text):
381-
text.makeNativeUTF8IfNeeded()
381+
text.makeContiguousUTF8()
382382
let length = text.utf8.count
383383
return text.utf8.withContiguousStorageIfAvailable({ (buf: UnsafeBufferPointer<UInt8>) in
384384
return body(.init(kind: .docLineComment, length: length, customText: buf))
385385
})!
386386
case var .docBlockComment(text):
387-
text.makeNativeUTF8IfNeeded()
387+
text.makeContiguousUTF8()
388388
let length = text.utf8.count
389389
return text.utf8.withContiguousStorageIfAvailable({ (buf: UnsafeBufferPointer<UInt8>) in
390390
return body(.init(kind: .docBlockComment, length: length, customText: buf))
391391
})!
392392
case var .garbageText(text):
393-
text.makeNativeUTF8IfNeeded()
393+
text.makeContiguousUTF8()
394394
let length = text.utf8.count
395395
return text.utf8.withContiguousStorageIfAvailable({ (buf: UnsafeBufferPointer<UInt8>) in
396396
return body(.init(kind: .garbageText, length: length, customText: buf))

Tests/SwiftSyntaxTest/SyntaxTests.swift

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,4 +144,11 @@ public class SyntaxTests: XCTestCase {
144144
XCTAssertEqual(Syntax(integerExpr), Syntax(fromProtocol: integerExpr as SyntaxProtocol))
145145
XCTAssertEqual(Syntax(integerExpr), Syntax(fromProtocol: integerExpr as ExprSyntaxProtocol))
146146
}
147+
148+
public func testRunParserOnForeignString() {
149+
// Store the source code in a foreign non-UTF-8 string.
150+
// If SwiftSyntax fails to convert it to a native UTF-8 string, internal assertions should fail.
151+
let sourceNsString = "var 🎉 = 2" as NSString
152+
_ = try? SyntaxParser.parse(source: sourceNsString as String)
153+
}
147154
}

0 commit comments

Comments
 (0)