Skip to content

Commit 7d03a1e

Browse files
committed
Introduce new compiler interface
Add the SPI interface `swiftCompilerLexRegexLiteral` and `swiftCompilerParseRegexLiteral` for the Swift compiler to call into. This allows us to avoid depending on other library API on the compiler side, while letting it keep the bridging gunk. While we're here, add an extra `String` return for the parsing function that could allow us to change the regex emission format in the future. This still needs to be plumbed through on the complier side though.
1 parent 435090d commit 7d03a1e

File tree

3 files changed

+130
-147
lines changed

3 files changed

+130
-147
lines changed
Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// This source file is part of the Swift.org open source project
4+
//
5+
// Copyright (c) 2021-2022 Apple Inc. and the Swift project authors
6+
// Licensed under Apache License v2.0 with Runtime Library Exception
7+
//
8+
// See https://swift.org/LICENSE.txt for license information
9+
//
10+
//===----------------------------------------------------------------------===//
11+
12+
// The version number for the regex. This gets emitted as an argument to the
13+
// Regex(_regexString:version:) initializer and should be bumped if the format
14+
// of the regex string needs to be changed in such a that requires the runtime
15+
// to updated.
16+
public let currentRegexLiteralFormatVersion = 1
17+
18+
@_spi(CompilerInterface)
19+
public struct CompilerLexError: Error {
20+
public var message: String
21+
public var location: UnsafeRawPointer
22+
public var completelyErroneous: Bool
23+
}
24+
25+
/// Interface for the Swift compiler.
26+
///
27+
/// Attempt to lex a regex literal string.
28+
///
29+
/// - Parameters:
30+
/// - start: The pointer at which to start lexing the literal.
31+
/// - bufferEnd: A pointer to the end of the buffer, which should not be lexed
32+
/// past.
33+
/// - mustBeRegex: Whether we expect a regex literal to be lexed here. If
34+
/// `false`, a regex literal will only be lexed if it does not
35+
/// produce an error.
36+
///
37+
/// - Returns: If a regex literal was lexed, `resumePtr` specifies where to
38+
/// resume lexing and `error` specifies a lexing error to emit. If
39+
/// a regex literal was not lexed, `nil` is returned.
40+
///
41+
@_spi(CompilerInterface)
42+
public func swiftCompilerLexRegexLiteral(
43+
start: UnsafeRawPointer, bufferEnd: UnsafeRawPointer, mustBeRegex: Bool
44+
) -> (resumePtr: UnsafeRawPointer, error: CompilerLexError?)? {
45+
do {
46+
let (_, _, endPtr) = try lexRegex(start: start, end: bufferEnd)
47+
return (resumePtr: endPtr, error: nil)
48+
} catch let error as DelimiterLexError {
49+
if !mustBeRegex {
50+
// This token can be something else. Let the client fallback.
51+
return nil
52+
}
53+
let completelyErroneous: Bool
54+
switch error.kind {
55+
case .unterminated, .multilineClosingNotOnNewline:
56+
// These can be recovered from.
57+
completelyErroneous = false
58+
case .unprintableASCII, .invalidUTF8:
59+
// We don't currently have good recovery behavior for these.
60+
completelyErroneous = true
61+
case .unknownDelimiter:
62+
// An unknown delimiter should be recovered from, as we may want to try
63+
// lex something else.
64+
return nil
65+
}
66+
// For now every lexer error is emitted at the starting delimiter.
67+
let compilerError = CompilerLexError(
68+
message: "\(error)", location: start,
69+
completelyErroneous: completelyErroneous
70+
)
71+
return (error.resumePtr, compilerError)
72+
} catch {
73+
fatalError("Should be a DelimiterLexError")
74+
}
75+
}
76+
77+
@_spi(CompilerInterface)
78+
public struct CompilerParseError: Error {
79+
public var message: String
80+
public var location: String.Index?
81+
}
82+
83+
/// Interface for the Swift compiler.
84+
///
85+
/// Attempt to parse a regex literal string.
86+
///
87+
/// - Parameters:
88+
/// - input: The regex input string, including delimiters.
89+
/// - captureBufferOut: A buffer into which the captures of the regex will
90+
/// be encoded into upon a successful parse.
91+
///
92+
/// - Returns: The string to emit along with its version number.
93+
/// - Throws: `CompilerParseError` if there was a parsing error.
94+
@_spi(CompilerInterface)
95+
public func swiftCompilerParseRegexLiteral(
96+
_ input: String, captureBufferOut: UnsafeMutableRawBufferPointer
97+
) throws -> (regexToEmit: String, version: Int) {
98+
do {
99+
let ast = try parseWithDelimiters(input)
100+
// Serialize the capture structure for later type inference.
101+
assert(captureBufferOut.count >= input.utf8.count)
102+
ast.captureStructure.encode(to: captureBufferOut)
103+
104+
// For now we just return the input as the regex to emit. This could be
105+
// changed in the future if need to back-deploy syntax to something already
106+
// known to the matching engine, or otherwise change the format. Note
107+
// however that it will need plumbing through on the compiler side.
108+
return (regexToEmit: input, version: currentRegexLiteralFormatVersion)
109+
} catch {
110+
throw CompilerParseError(
111+
message: "cannot parse regular expression: \(String(describing: error))",
112+
location: (error as? LocatedErrorProtocol)?.location.start
113+
)
114+
}
115+
}

Sources/_RegexParser/Regex/Parse/Mocking.swift

Lines changed: 0 additions & 128 deletions
This file was deleted.

Tests/RegexTests/ParseTests.swift

Lines changed: 15 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
//
1010
//===----------------------------------------------------------------------===//
1111

12-
@testable import _RegexParser
12+
@testable @_spi(CompilerInterface) import _RegexParser
1313

1414
import XCTest
1515
@testable import _StringProcessing
@@ -281,24 +281,20 @@ func delimiterLexingDiagnosticTest(
281281
}
282282
}
283283

284-
func libswiftDiagnosticMessageTest(
285-
_ input: String, _ expectedErr: String, file: StaticString = #file,
286-
line: UInt = #line
284+
func compilerInterfaceDiagnosticMessageTest(
285+
_ input: String, _ expectedErr: String,
286+
file: StaticString = #file, line: UInt = #line
287287
) {
288-
var errPtr: UnsafePointer<CChar>?
289-
var version: CUnsignedInt = 0
290-
291-
libswiftParseRegexLiteral(
292-
input, &errPtr, &version, /*captureStructure*/ nil,
293-
/*captureStructureSize*/ 0
294-
)
295-
296-
guard let errPtr = errPtr else {
297-
XCTFail("Unexpected test pass", file: file, line: line)
298-
return
288+
do {
289+
let captureBuffer = UnsafeMutableRawBufferPointer(start: nil, count: 0)
290+
_ = try swiftCompilerParseRegexLiteral(
291+
input, captureBufferOut: captureBuffer)
292+
XCTFail("Expected parse error", file: file, line: line)
293+
} catch let error as CompilerParseError {
294+
XCTAssertEqual(expectedErr, error.message, file: file, line: line)
295+
} catch {
296+
fatalError("Expected CompilerParseError")
299297
}
300-
let err = String(cString: errPtr)
301-
XCTAssertEqual(expectedErr, err, file: file, line: line)
302298
}
303299

304300
extension RegexTests {
@@ -2547,8 +2543,8 @@ extension RegexTests {
25472543
delimiterLexingDiagnosticTest("#/\n#/#", .multilineClosingNotOnNewline)
25482544
}
25492545

2550-
func testlibswiftDiagnostics() {
2551-
libswiftDiagnosticMessageTest(
2546+
func testCompilerInterfaceDiagnostics() {
2547+
compilerInterfaceDiagnosticMessageTest(
25522548
"#/[x*/#", "cannot parse regular expression: expected ']'")
25532549
}
25542550
}

0 commit comments

Comments
 (0)