Skip to content

Allow setting any of the three quant behaviors #311

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Apr 21, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 2 additions & 42 deletions Sources/RegexBuilder/DSL.swift
Original file line number Diff line number Diff line change
Expand Up @@ -94,40 +94,20 @@ extension UnicodeScalar: RegexComponent {

// Note: Quantifiers are currently gyb'd.

/// Specifies how much to attempt to match when using a quantifier.
@available(SwiftStdlib 5.7, *)
public struct QuantificationBehavior {
internal enum Kind {
case eagerly
case reluctantly
case possessively
}

var kind: Kind

internal var astKind: DSLTree._AST.QuantificationKind {
switch kind {
case .eagerly: return .eager
case .reluctantly: return .reluctant
case .possessively: return .possessive
}
}
}

extension DSLTree.Node {
/// Generates a DSLTree node for a repeated range of the given DSLTree node.
/// Individual public API functions are in the generated Variadics.swift file.
@available(SwiftStdlib 5.7, *)
static func repeating(
_ range: Range<Int>,
_ behavior: QuantificationBehavior?,
_ behavior: RegexRepetitionBehavior?,
_ node: DSLTree.Node
) -> DSLTree.Node {
// TODO: Throw these as errors
assert(range.lowerBound >= 0, "Cannot specify a negative lower bound")
assert(!range.isEmpty, "Cannot specify an empty range")

let kind: DSLTree.QuantificationKind = behavior.map { .explicit($0.astKind) } ?? .default
let kind: DSLTree.QuantificationKind = behavior.map { .explicit($0.dslTreeKind) } ?? .default

switch (range.lowerBound, range.upperBound) {
case (0, Int.max): // 0...
Expand All @@ -147,26 +127,6 @@ extension DSLTree.Node {
}
}

@available(SwiftStdlib 5.7, *)
extension QuantificationBehavior {
/// Match as much of the input string as possible, backtracking when
/// necessary.
public static var eagerly: QuantificationBehavior {
.init(kind: .eagerly)
}

/// Match as little of the input string as possible, expanding the matched
/// region as necessary to complete a match.
public static var reluctantly: QuantificationBehavior {
.init(kind: .reluctantly)
}

/// Match as much of the input string as possible, performing no backtracking.
public static var possessively: QuantificationBehavior {
.init(kind: .possessively)
}
}

@available(SwiftStdlib 5.7, *)
public struct OneOrMore<Output>: _BuiltinRegexComponent {
public var regex: Regex<Output>
Expand Down
308 changes: 154 additions & 154 deletions Sources/RegexBuilder/Variadics.swift

Large diffs are not rendered by default.

10 changes: 5 additions & 5 deletions Sources/VariadicsGenerator/VariadicsGenerator.swift
Original file line number Diff line number Diff line change
Expand Up @@ -377,9 +377,9 @@ struct VariadicsGenerator: ParsableCommand {
\(params.disfavored)\
public init<\(params.genericParams)>(
_ component: Component,
_ behavior: QuantificationBehavior? = nil
_ behavior: RegexRepetitionBehavior? = nil
) \(params.whereClauseForInit) {
let kind: DSLTree.QuantificationKind = behavior.map { .explicit($0.astKind) } ?? .default
let kind: DSLTree.QuantificationKind = behavior.map { .explicit($0.dslTreeKind) } ?? .default
self.init(node: .quantification(.\(kind.astQuantifierAmount), kind, component.regex.root))
}
}
Expand All @@ -389,10 +389,10 @@ struct VariadicsGenerator: ParsableCommand {
\(defaultAvailableAttr)
\(params.disfavored)\
public init<\(params.genericParams)>(
_ behavior: QuantificationBehavior? = nil,
_ behavior: RegexRepetitionBehavior? = nil,
@\(concatBuilderName) _ component: () -> Component
) \(params.whereClauseForInit) {
let kind: DSLTree.QuantificationKind = behavior.map { .explicit($0.astKind) } ?? .default
let kind: DSLTree.QuantificationKind = behavior.map { .explicit($0.dslTreeKind) } ?? .default
self.init(node: .quantification(.\(kind.astQuantifierAmount), kind, component().regex.root))
}
}
Expand Down Expand Up @@ -508,7 +508,7 @@ struct VariadicsGenerator: ParsableCommand {
public init<\(params.genericParams), R: RangeExpression>(
_ component: Component,
_ expression: R,
_ behavior: QuantificationBehavior? = nil
_ behavior: RegexRepetitionBehavior? = nil
) \(params.repeatingWhereClause) {
self.init(node: .repeating(expression.relative(to: 0..<Int.max), behavior, component.regex.root))
}
Expand Down
4 changes: 4 additions & 0 deletions Sources/_RegexParser/Regex/AST/MatchingOptions.swift
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,11 @@ extension AST {
case graphemeClusterSemantics // X
case unicodeScalarSemantics // u
case byteSemantics // b

// Swift-only default possessive quantifier
case possessiveByDefault // t.b.d.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In general, it's fine to model swift extensions (even ones that we haven't assigned letters to) in the options here. But, I'm curious if or why it's necessary.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's just more of our existing AST leakage. I think it's okay since we'll want it in the AST eventually, but we still do need to audit and remove AST values from non-AST types.

}

public var kind: Kind
public var location: SourceLocation

Expand Down
4 changes: 1 addition & 3 deletions Sources/_StringProcessing/ByteCodeGen.swift
Original file line number Diff line number Diff line change
Expand Up @@ -374,9 +374,7 @@ extension Compiler.ByteCodeGen {
case .syntax(let kind):
updatedKind = kind.ast.applying(options)
case .default:
updatedKind = options.isReluctantByDefault
? .reluctant
: .eager
updatedKind = options.defaultQuantificationKind
}

let (low, high) = amount.bounds
Expand Down
34 changes: 33 additions & 1 deletion Sources/_StringProcessing/MatchingOptions.swift
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ struct MatchingOptions {
// Must contain exactly one of each mutually exclusive group
assert(stack.last!.intersection(.textSegmentOptions).rawValue.nonzeroBitCount == 1)
assert(stack.last!.intersection(.semanticMatchingLevels).rawValue.nonzeroBitCount == 1)

// Must contain at most one quantifier behavior
assert(stack.last!.intersection(.repetitionBehaviors).rawValue.nonzeroBitCount <= 1)
}
}

Expand Down Expand Up @@ -63,6 +66,16 @@ extension MatchingOptions {
stack.last!.contains(.reluctantByDefault)
}

var defaultQuantificationKind: AST.Quantification.Kind {
if stack.last!.contains(.possessiveByDefault) {
return .possessive
} else if stack.last!.contains(.reluctantByDefault) {
return .reluctant
} else {
return .eager
}
}

var dotMatchesNewline: Bool {
stack.last!.contains(.singleLine)
}
Expand Down Expand Up @@ -150,6 +163,9 @@ extension MatchingOptions {
case unicodeScalarSemantics
case byteSemantics

// Swift-only default possessive quantifier
case possessiveByDefault
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this need to be modeled in this way? It seems like options would just have a quantification kind in it.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a bitset internally, so it's modeling the tripartite state with two Booleans and an invariant check.


init?(_ astKind: AST.MatchingOption.Kind) {
switch astKind {
case .caseInsensitive:
Expand Down Expand Up @@ -184,6 +200,8 @@ extension MatchingOptions {
self = .unicodeScalarSemantics
case .byteSemantics:
self = .byteSemantics
case .possessiveByDefault:
self = .possessiveByDefault

// Whitespace options are only relevant during parsing, not compilation.
case .extended, .extraExtended:
Expand Down Expand Up @@ -219,6 +237,9 @@ extension MatchingOptions {
if Self.textSegmentOptions.contains(opt.representation) {
remove(.textSegmentOptions)
}
if Self.repetitionBehaviors.contains(opt.representation) {
remove(.repetitionBehaviors)
}

insert(opt.representation)
}
Expand All @@ -241,6 +262,9 @@ extension MatchingOptions {
guard let opt = Option(opt.kind) else {
continue
}
if Self.repetitionBehaviors.contains(opt.representation) {
remove(.repetitionBehaviors)
}
remove(opt.representation)
}
}
Expand Down Expand Up @@ -274,7 +298,15 @@ extension MatchingOptions.Representation {
static var semanticMatchingLevels: Self {
[.graphemeClusterSemantics, .unicodeScalarSemantics, .byteSemantics]
}


// Quantification behavior options
static var reluctantByDefault: Self { .init(.reluctantByDefault) }
static var possessiveByDefault: Self { .init(.possessiveByDefault) }

static var repetitionBehaviors: Self {
[.reluctantByDefault, .possessiveByDefault]
}

/// The default set of options.
static var `default`: Self {
[.graphemeClusterSemantics, .textSegmentGraphemeMode]
Expand Down
66 changes: 56 additions & 10 deletions Sources/_StringProcessing/Regex/Options.swift
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,8 @@ extension RegexComponent {
/// Returns a regular expression where the start and end of input
/// anchors (`^` and `$`) also match against the start and end of a line.
///
/// This method corresponds to applying the `m` option in a regular
/// expression literal. For this behavior in the `RegexBuilder` syntax, see
/// This method corresponds to applying the `m` option in regex syntax. For
/// this behavior in the `RegexBuilder` syntax, see
/// ``Anchor.startOfLine``, ``Anchor.endOfLine``, ``Anchor.startOfInput``,
/// and ``Anchor.endOfInput``.
///
Expand All @@ -69,16 +69,22 @@ extension RegexComponent {
wrapInOption(.multiline, addingIf: matchLineEndings)
}

/// Returns a regular expression where quantifiers are reluctant by default
/// instead of eager.
/// Returns a regular expression where quantifiers use the specified behavior
/// by default.
///
/// This method corresponds to applying the `U` option in a regular
/// expression literal.
/// This setting does not affect calls to quantifier methods, such as
/// `OneOrMore`, that include an explicit `behavior` parameter.
///
/// - Parameter useReluctantQuantifiers: A Boolean value indicating whether
/// quantifiers should be reluctant by default.
public func reluctantQuantifiers(_ useReluctantQuantifiers: Bool = true) -> Regex<RegexOutput> {
wrapInOption(.reluctantByDefault, addingIf: useReluctantQuantifiers)
/// Passing `.eager` or `.reluctant` to this method corresponds to applying
/// the `(?-U)` or `(?U)` option in regex syntax, respectively.
///
/// - Parameter behavior: The default behavior to use for quantifiers.
public func repetitionBehavior(_ behavior: RegexRepetitionBehavior) -> Regex<RegexOutput> {
if behavior == .possessive {
return wrapInOption(.possessiveByDefault, addingIf: true)
} else {
return wrapInOption(.reluctantByDefault, addingIf: behavior == .reluctant)
}
}

/// Returns a regular expression that matches with the specified semantic
Expand Down Expand Up @@ -183,6 +189,46 @@ public struct RegexWordBoundaryKind: Hashable {
}
}

/// Specifies how much to attempt to match when using a quantifier.
@available(SwiftStdlib 5.7, *)
public struct RegexRepetitionBehavior: Hashable {
internal enum Kind {
case eager
case reluctant
case possessive
}

var kind: Kind

@_spi(RegexBuilder) public var dslTreeKind: DSLTree._AST.QuantificationKind {
switch kind {
case .eager: return .eager
case .reluctant: return .reluctant
case .possessive: return .possessive
}
}
}

@available(SwiftStdlib 5.7, *)
extension RegexRepetitionBehavior {
/// Match as much of the input string as possible, backtracking when
/// necessary.
public static var eager: Self {
.init(kind: .eager)
}

/// Match as little of the input string as possible, expanding the matched
/// region as necessary to complete a match.
public static var reluctant: Self {
.init(kind: .reluctant)
}

/// Match as much of the input string as possible, performing no backtracking.
public static var possessive: Self {
.init(kind: .possessive)
}
}

// MARK: - Helper method

@available(SwiftStdlib 5.7, *)
Expand Down
Loading