Skip to content

RegexBuilder module #227

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 25, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 11 additions & 2 deletions Package.swift
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,22 @@ let package = Package(
.target(
name: "_StringProcessing",
dependencies: ["_MatchingEngine", "_CUnicode"],
swiftSettings: [
.unsafeFlags(["-enable-library-evolution"]),
]),
.target(
name: "RegexBuilder",
dependencies: ["_StringProcessing", "_MatchingEngine"],
swiftSettings: [
.unsafeFlags(["-enable-library-evolution"]),
.unsafeFlags(["-Xfrontend", "-enable-experimental-pairwise-build-block"])
]),
.testTarget(
name: "RegexTests",
dependencies: ["_StringProcessing"],
dependencies: ["_StringProcessing"]),
.testTarget(
name: "RegexBuilderTests",
dependencies: ["_StringProcessing", "RegexBuilder"],
swiftSettings: [
.unsafeFlags(["-Xfrontend", "-enable-experimental-pairwise-build-block"])
]),
Expand All @@ -73,7 +82,7 @@ let package = Package(
// MARK: Exercises
.target(
name: "Exercises",
dependencies: ["_MatchingEngine", "Prototypes", "_StringProcessing"],
dependencies: ["_MatchingEngine", "Prototypes", "_StringProcessing", "RegexBuilder"],
swiftSettings: [
.unsafeFlags(["-Xfrontend", "-enable-experimental-pairwise-build-block"])
]),
Expand Down
1 change: 1 addition & 0 deletions Sources/Exercises/Participants/RegexParticipant.swift
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
//===----------------------------------------------------------------------===//

import _StringProcessing
import RegexBuilder

/*

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
//===----------------------------------------------------------------------===//

import _MatchingEngine
@_spi(RegexBuilder) import _StringProcessing

public struct Anchor {
internal enum Kind {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
//
//===----------------------------------------------------------------------===//

@_spi(RegexBuilder) import _StringProcessing

@resultBuilder
public enum RegexComponentBuilder {
public static func buildBlock() -> Regex<Substring> {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,15 @@
//===----------------------------------------------------------------------===//

import _MatchingEngine
@_spi(RegexBuilder) import _StringProcessing

extension Regex {
public init<Content: RegexComponent>(
@RegexComponentBuilder _ content: () -> Content
) where Content.Output == Output {
self.init(content())
}
}

// A convenience protocol for builtin regex components that are initialized with
// a `DSLTree` node.
Expand All @@ -23,51 +32,6 @@ extension _BuiltinRegexComponent {
}
}

// MARK: - Primitives

extension String: RegexComponent {
public typealias Output = Substring

public var regex: Regex<Output> {
.init(node: .quotedLiteral(self))
}
}

extension Substring: RegexComponent {
public typealias Output = Substring

public var regex: Regex<Output> {
.init(node: .quotedLiteral(String(self)))
}
}

extension Character: RegexComponent {
public typealias Output = Substring

public var regex: Regex<Output> {
.init(node: .atom(.char(self)))
}
}

extension UnicodeScalar: RegexComponent {
public typealias Output = Substring

public var regex: Regex<Output> {
.init(node: .atom(.scalar(self)))
}
}

extension CharacterClass: RegexComponent {
public typealias Output = Substring

public var regex: Regex<Output> {
guard let ast = self.makeAST() else {
fatalError("FIXME: extended AST?")
}
return Regex(ast: ast)
}
}

// MARK: - Combinators

// MARK: Concatenation
Expand Down Expand Up @@ -96,9 +60,9 @@ public struct QuantificationBehavior {
case reluctantly
case possessively
}

var kind: Kind

internal var astKind: AST.Quantification.Kind {
switch kind {
case .eagerly: return .eager
Expand All @@ -108,19 +72,49 @@ public struct QuantificationBehavior {
}
}

extension DSLTree.Node {
/// Generates a DSLTree node for a repeated range of the given DSLTree node.
/// Individual public API functions are in the generated Variadics.swift file.
static func repeating(
_ range: Range<Int>,
_ behavior: QuantificationBehavior,
_ node: DSLTree.Node
) -> DSLTree.Node {
// TODO: Throw these as errors
assert(range.lowerBound >= 0, "Cannot specify a negative lower bound")
assert(!range.isEmpty, "Cannot specify an empty range")

switch (range.lowerBound, range.upperBound) {
case (0, Int.max): // 0...
return .quantification(.zeroOrMore, behavior.astKind, node)
case (1, Int.max): // 1...
return .quantification(.oneOrMore, behavior.astKind, node)
case _ where range.count == 1: // ..<1 or ...0 or any range with count == 1
// Note: `behavior` is ignored in this case
return .quantification(.exactly(.init(faking: range.lowerBound)), .eager, node)
case (0, _): // 0..<n or 0...n or ..<n or ...n
return .quantification(.upToN(.init(faking: range.upperBound)), behavior.astKind, node)
case (_, Int.max): // n...
return .quantification(.nOrMore(.init(faking: range.lowerBound)), behavior.astKind, node)
default: // any other range
return .quantification(.range(.init(faking: range.lowerBound), .init(faking: range.upperBound)), behavior.astKind, node)
}
}
}

extension QuantificationBehavior {
/// Match as much of the input string as possible, backtracking when
/// necessary.
public static var eagerly: QuantificationBehavior {
.init(kind: .eagerly)
}

/// Match as little of the input string as possible, expanding the matched
/// region as necessary to complete a match.
public static var reluctantly: QuantificationBehavior {
.init(kind: .reluctantly)
}

/// Match as much of the input string as possible, performing no backtracking.
public static var possessively: QuantificationBehavior {
.init(kind: .possessively)
Expand Down Expand Up @@ -247,22 +241,18 @@ public struct TryCapture<Output>: _BuiltinRegexComponent {

// MARK: - Backreference

struct ReferenceID: Hashable, Equatable {
private static var counter: Int = 0
var base: Int

init() {
base = Self.counter
Self.counter += 1
}
}

public struct Reference<Capture>: RegexComponent {
let id = ReferenceID()

public init(_ captureType: Capture.Type = Capture.self) {}

public var regex: Regex<Capture> {
.init(node: .atom(.symbolicReference(id)))
}
}

extension Regex.Match {
public subscript<Capture>(_ reference: Reference<Capture>) -> Capture {
self[reference.id]
}
}
28 changes: 28 additions & 0 deletions Sources/RegexBuilder/Match.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
//===----------------------------------------------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2021-2022 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
//
//===----------------------------------------------------------------------===//

import _StringProcessing

extension String {
public func match<R: RegexComponent>(
@RegexComponentBuilder _ content: () -> R
) -> Regex<R.Output>.Match? {
match(content())
}
}

extension Substring {
public func match<R: RegexComponent>(
@RegexComponentBuilder _ content: () -> R
) -> Regex<R.Output>.Match? {
match(content())
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
// BEGIN AUTO-GENERATED CONTENT

import _MatchingEngine
@_spi(RegexBuilder) import _StringProcessing

extension RegexComponentBuilder {
public static func buildPartialBlock<W0, W1, C0, R0: RegexComponent, R1: RegexComponent>(
Expand Down
3 changes: 2 additions & 1 deletion Sources/VariadicsGenerator/VariadicsGenerator.swift
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
//
//===----------------------------------------------------------------------===//

// swift run VariadicsGenerator --max-arity 10 > Sources/_StringProcessing/RegexDSL/Variadics.swift
// swift run VariadicsGenerator --max-arity 10 > Sources/RegexBuilder/Variadics.swift

import ArgumentParser
#if os(macOS)
Expand Down Expand Up @@ -121,6 +121,7 @@ struct VariadicsGenerator: ParsableCommand {
// BEGIN AUTO-GENERATED CONTENT

import _MatchingEngine
@_spi(RegexBuilder) import _StringProcessing


""")
Expand Down
11 changes: 11 additions & 0 deletions Sources/_StringProcessing/CharacterClass.swift
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,17 @@ public struct CharacterClass: Hashable {
}
}

extension CharacterClass: RegexComponent {
public typealias Output = Substring

public var regex: Regex<Output> {
guard let ast = self.makeAST() else {
fatalError("FIXME: extended AST?")
}
return Regex(ast: ast)
}
}

extension RegexComponent where Self == CharacterClass {
public static var any: CharacterClass {
.init(cc: .any, matchLevel: .graphemeCluster)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ public struct Regex<Output>: RegexComponent {
init(ast: AST) {
self.tree = ast.dslTree
}

init(tree: DSLTree) {
self.tree = tree
}
Expand All @@ -44,7 +45,8 @@ public struct Regex<Output>: RegexComponent {
let program: Program
// var ast: AST { program.ast }

var root: DSLTree.Node {
@_spi(RegexBuilder)
public var root: DSLTree.Node {
program.tree.root
}

Expand All @@ -59,7 +61,8 @@ public struct Regex<Output>: RegexComponent {
self.program = Program(ast: .init(ast, globalOptions: nil))
}

init(node: DSLTree.Node) {
@_spi(RegexBuilder)
public init(node: DSLTree.Node) {
self.program = Program(tree: .init(node, options: nil))
}

Expand All @@ -84,17 +87,46 @@ public struct Regex<Output>: RegexComponent {
self = content.regex
}

public init<Content: RegexComponent>(
@RegexComponentBuilder _ content: () -> Content
) where Content.Output == Output {
self.init(content())
public var regex: Regex<Output> {
self
}
}

// MARK: - Primitive regex components

extension String: RegexComponent {
public typealias Output = Substring

public var regex: Regex<Output> {
self
.init(node: .quotedLiteral(self))
}
}

extension Substring: RegexComponent {
public typealias Output = Substring

public var regex: Regex<Output> {
.init(node: .quotedLiteral(String(self)))
}
}

extension Character: RegexComponent {
public typealias Output = Substring

public var regex: Regex<Output> {
.init(node: .atom(.char(self)))
}
}

extension UnicodeScalar: RegexComponent {
public typealias Output = Substring

public var regex: Regex<Output> {
.init(node: .atom(.scalar(self)))
}
}

// MARK: - Testing

public struct MockRegexLiteral<Output>: RegexComponent {
public typealias MatchValue = Substring
Expand Down
Loading