Skip to content

Make sure we convert the source to a native UTF-8 string before parsing #251

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Dec 2, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Sources/SwiftSyntax/SyntaxParser.swift
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ public enum SyntaxParser {
// If the string is backed by an NSString then such indexing will become
// extremely slow.
var utf8Source = source
utf8Source.makeNativeUTF8IfNeeded()
utf8Source.makeContiguousUTF8()

let rawSyntax = parseRaw(utf8Source, parseTransition, filenameForDiagnostics,
diagnosticEngine)
Expand Down Expand Up @@ -127,7 +127,7 @@ public enum SyntaxParser {
_ filenameForDiagnostics: String,
_ diagnosticEngine: DiagnosticEngine?
) -> RawSyntax {
assert(source.isNativeUTF8)
precondition(source.isContiguousUTF8)
let c_parser = swiftparse_parser_create()
defer {
swiftparse_parser_dispose(c_parser)
Expand Down
2 changes: 1 addition & 1 deletion Sources/SwiftSyntax/TokenKind.swift.gyb
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ extension TokenKind {
return body(.init(kind: .${token.swift_kind()}, length: length))
% else:
case .${token.swift_kind()}(var text):
text.makeNativeUTF8IfNeeded()
text.makeContiguousUTF8()
let length = text.utf8.count
return text.utf8.withContiguousStorageIfAvailable({ (buf: UnsafeBufferPointer<UInt8>) in
return body(.init(kind: .${token.swift_kind()}, length: length, customText: buf))
Expand Down
2 changes: 1 addition & 1 deletion Sources/SwiftSyntax/Trivia.swift.gyb
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ extension TriviaPiece {
return body(.init(kind: .${trivia.lower_name}s, length: length))
% else:
case var .${trivia.lower_name}(text):
text.makeNativeUTF8IfNeeded()
text.makeContiguousUTF8()
let length = text.utf8.count
return text.utf8.withContiguousStorageIfAvailable({ (buf: UnsafeBufferPointer<UInt8>) in
return body(.init(kind: .${trivia.lower_name}, length: length, customText: buf))
Expand Down
20 changes: 0 additions & 20 deletions Sources/SwiftSyntax/Utils.swift
Original file line number Diff line number Diff line change
Expand Up @@ -73,26 +73,6 @@ extension String {
static func fromBuffer(_ textBuffer: UnsafeBufferPointer<UInt8>) -> String {
return String(decoding: textBuffer, as: UTF8.self)
}

var isNativeUTF8: Bool {
return utf8.withContiguousStorageIfAvailable { _ in 0 } != nil
}

mutating func makeNativeUTF8IfNeeded() {
if !isNativeUTF8 {
self += ""
}
}

func utf8Slice(offset: Int, length: Int) -> Substring {
if length == 0 {
return Substring()
}
let utf8 = self.utf8
let begin = utf8.index(utf8.startIndex, offsetBy: offset)
let end = utf8.index(begin, offsetBy: length)
return Substring(utf8[begin..<end])
}
}

extension UnsafeBufferPointer where Element == UInt8 {
Expand Down
26 changes: 13 additions & 13 deletions Sources/SwiftSyntax/gyb_generated/TokenKind.swift
Original file line number Diff line number Diff line change
Expand Up @@ -1771,79 +1771,79 @@ extension TokenKind {
let length = 13
return body(.init(kind: .poundColorLiteralKeyword, length: length))
case .integerLiteral(var text):
text.makeNativeUTF8IfNeeded()
text.makeContiguousUTF8()
let length = text.utf8.count
return text.utf8.withContiguousStorageIfAvailable({ (buf: UnsafeBufferPointer<UInt8>) in
return body(.init(kind: .integerLiteral, length: length, customText: buf))
})!
case .floatingLiteral(var text):
text.makeNativeUTF8IfNeeded()
text.makeContiguousUTF8()
let length = text.utf8.count
return text.utf8.withContiguousStorageIfAvailable({ (buf: UnsafeBufferPointer<UInt8>) in
return body(.init(kind: .floatingLiteral, length: length, customText: buf))
})!
case .stringLiteral(var text):
text.makeNativeUTF8IfNeeded()
text.makeContiguousUTF8()
let length = text.utf8.count
return text.utf8.withContiguousStorageIfAvailable({ (buf: UnsafeBufferPointer<UInt8>) in
return body(.init(kind: .stringLiteral, length: length, customText: buf))
})!
case .unknown(var text):
text.makeNativeUTF8IfNeeded()
text.makeContiguousUTF8()
let length = text.utf8.count
return text.utf8.withContiguousStorageIfAvailable({ (buf: UnsafeBufferPointer<UInt8>) in
return body(.init(kind: .unknown, length: length, customText: buf))
})!
case .identifier(var text):
text.makeNativeUTF8IfNeeded()
text.makeContiguousUTF8()
let length = text.utf8.count
return text.utf8.withContiguousStorageIfAvailable({ (buf: UnsafeBufferPointer<UInt8>) in
return body(.init(kind: .identifier, length: length, customText: buf))
})!
case .unspacedBinaryOperator(var text):
text.makeNativeUTF8IfNeeded()
text.makeContiguousUTF8()
let length = text.utf8.count
return text.utf8.withContiguousStorageIfAvailable({ (buf: UnsafeBufferPointer<UInt8>) in
return body(.init(kind: .unspacedBinaryOperator, length: length, customText: buf))
})!
case .spacedBinaryOperator(var text):
text.makeNativeUTF8IfNeeded()
text.makeContiguousUTF8()
let length = text.utf8.count
return text.utf8.withContiguousStorageIfAvailable({ (buf: UnsafeBufferPointer<UInt8>) in
return body(.init(kind: .spacedBinaryOperator, length: length, customText: buf))
})!
case .postfixOperator(var text):
text.makeNativeUTF8IfNeeded()
text.makeContiguousUTF8()
let length = text.utf8.count
return text.utf8.withContiguousStorageIfAvailable({ (buf: UnsafeBufferPointer<UInt8>) in
return body(.init(kind: .postfixOperator, length: length, customText: buf))
})!
case .prefixOperator(var text):
text.makeNativeUTF8IfNeeded()
text.makeContiguousUTF8()
let length = text.utf8.count
return text.utf8.withContiguousStorageIfAvailable({ (buf: UnsafeBufferPointer<UInt8>) in
return body(.init(kind: .prefixOperator, length: length, customText: buf))
})!
case .dollarIdentifier(var text):
text.makeNativeUTF8IfNeeded()
text.makeContiguousUTF8()
let length = text.utf8.count
return text.utf8.withContiguousStorageIfAvailable({ (buf: UnsafeBufferPointer<UInt8>) in
return body(.init(kind: .dollarIdentifier, length: length, customText: buf))
})!
case .contextualKeyword(var text):
text.makeNativeUTF8IfNeeded()
text.makeContiguousUTF8()
let length = text.utf8.count
return text.utf8.withContiguousStorageIfAvailable({ (buf: UnsafeBufferPointer<UInt8>) in
return body(.init(kind: .contextualKeyword, length: length, customText: buf))
})!
case .rawStringDelimiter(var text):
text.makeNativeUTF8IfNeeded()
text.makeContiguousUTF8()
let length = text.utf8.count
return text.utf8.withContiguousStorageIfAvailable({ (buf: UnsafeBufferPointer<UInt8>) in
return body(.init(kind: .rawStringDelimiter, length: length, customText: buf))
})!
case .stringSegment(var text):
text.makeNativeUTF8IfNeeded()
text.makeContiguousUTF8()
let length = text.utf8.count
return text.utf8.withContiguousStorageIfAvailable({ (buf: UnsafeBufferPointer<UInt8>) in
return body(.init(kind: .stringSegment, length: length, customText: buf))
Expand Down
10 changes: 5 additions & 5 deletions Sources/SwiftSyntax/gyb_generated/Trivia.swift
Original file line number Diff line number Diff line change
Expand Up @@ -366,31 +366,31 @@ extension TriviaPiece {
let length = count * 2
return body(.init(kind: .carriageReturnLineFeeds, length: length))
case var .lineComment(text):
text.makeNativeUTF8IfNeeded()
text.makeContiguousUTF8()
let length = text.utf8.count
return text.utf8.withContiguousStorageIfAvailable({ (buf: UnsafeBufferPointer<UInt8>) in
return body(.init(kind: .lineComment, length: length, customText: buf))
})!
case var .blockComment(text):
text.makeNativeUTF8IfNeeded()
text.makeContiguousUTF8()
let length = text.utf8.count
return text.utf8.withContiguousStorageIfAvailable({ (buf: UnsafeBufferPointer<UInt8>) in
return body(.init(kind: .blockComment, length: length, customText: buf))
})!
case var .docLineComment(text):
text.makeNativeUTF8IfNeeded()
text.makeContiguousUTF8()
let length = text.utf8.count
return text.utf8.withContiguousStorageIfAvailable({ (buf: UnsafeBufferPointer<UInt8>) in
return body(.init(kind: .docLineComment, length: length, customText: buf))
})!
case var .docBlockComment(text):
text.makeNativeUTF8IfNeeded()
text.makeContiguousUTF8()
let length = text.utf8.count
return text.utf8.withContiguousStorageIfAvailable({ (buf: UnsafeBufferPointer<UInt8>) in
return body(.init(kind: .docBlockComment, length: length, customText: buf))
})!
case var .garbageText(text):
text.makeNativeUTF8IfNeeded()
text.makeContiguousUTF8()
let length = text.utf8.count
return text.utf8.withContiguousStorageIfAvailable({ (buf: UnsafeBufferPointer<UInt8>) in
return body(.init(kind: .garbageText, length: length, customText: buf))
Expand Down
7 changes: 7 additions & 0 deletions Tests/SwiftSyntaxTest/SyntaxTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -144,4 +144,11 @@ public class SyntaxTests: XCTestCase {
XCTAssertEqual(Syntax(integerExpr), Syntax(fromProtocol: integerExpr as SyntaxProtocol))
XCTAssertEqual(Syntax(integerExpr), Syntax(fromProtocol: integerExpr as ExprSyntaxProtocol))
}

public func testRunParserOnForeignString() {
// Store the source code in a foreign non-UTF-8 string.
// If SwiftSyntax fails to convert it to a native UTF-8 string, internal assertions should fail.
let sourceNsString = "var 🎉 = 2" as NSString
_ = try? SyntaxParser.parse(source: sourceNsString as String)
}
}