Skip to content

Flatten optional nesting for regex literal captures #545

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jul 6, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 68 additions & 25 deletions Sources/_RegexParser/Regex/Parse/CaptureList.swift
Original file line number Diff line number Diff line change
Expand Up @@ -57,63 +57,105 @@ extension CaptureList {
}
}

extension CaptureList {
public struct Builder {
public var captures = CaptureList()

public init() {}

public struct OptionalNesting {
// We maintain two depths, inner and outer. These allow e.g the nesting
// of a regex literal in a DSL, where outside of the scope of the literal,
// nesting is allowed, but inside the literal at most one extra layer of
// optionality may be added.
public var outerDepth: Int
public var canNest: Bool
public var innerDepth: Int

internal init(outerDepth: Int, canNest: Bool) {
self.outerDepth = outerDepth
self.canNest = canNest
self.innerDepth = 0
}

public init(canNest: Bool) {
self.init(outerDepth: 0, canNest: canNest)
}

public var depth: Int { outerDepth + innerDepth }

public var disablingNesting: Self {
// If we are currently able to nest, store the current depth as the
// outer depth, and disable nesting for an inner scope.
guard canNest else { return self }
return .init(outerDepth: depth, canNest: false)
}

public var addingOptional: Self {
var result = self
result.innerDepth = canNest ? innerDepth + 1 : 1
return result
}
}
}
}

// MARK: Generating from AST

extension AST.Node {
public func _addCaptures(
to list: inout CaptureList,
optionalNesting nesting: Int
extension CaptureList.Builder {
public mutating func addCaptures(
of node: AST.Node, optionalNesting nesting: OptionalNesting
) {
let addOptional = nesting+1
switch self {
switch node {
case let .alternation(a):
for child in a.children {
child._addCaptures(to: &list, optionalNesting: addOptional)
addCaptures(of: child, optionalNesting: nesting.addingOptional)
}

case let .concatenation(c):
for child in c.children {
child._addCaptures(to: &list, optionalNesting: nesting)
addCaptures(of: child, optionalNesting: nesting)
}

case let .group(g):
switch g.kind.value {
case .capture:
list.append(.init(optionalDepth: nesting, g.location))
captures.append(.init(optionalDepth: nesting.depth, g.location))

case .namedCapture(let name):
list.append(.init(name: name.value, optionalDepth: nesting, g.location))
captures.append(.init(
name: name.value, optionalDepth: nesting.depth, g.location))

case .balancedCapture(let b):
list.append(.init(name: b.name?.value, optionalDepth: nesting,
g.location))
captures.append(.init(
name: b.name?.value, optionalDepth: nesting.depth, g.location))

default: break
}
g.child._addCaptures(to: &list, optionalNesting: nesting)
addCaptures(of: g.child, optionalNesting: nesting)

case .conditional(let c):
switch c.condition.kind {
case .group(let g):
AST.Node.group(g)._addCaptures(to: &list, optionalNesting: nesting)
addCaptures(of: .group(g), optionalNesting: nesting)
default:
break
}

c.trueBranch._addCaptures(to: &list, optionalNesting: addOptional)
c.falseBranch._addCaptures(to: &list, optionalNesting: addOptional)
addCaptures(of: c.trueBranch, optionalNesting: nesting.addingOptional)
addCaptures(of: c.falseBranch, optionalNesting: nesting.addingOptional)

case .quantification(let q):
var optNesting = nesting
if q.amount.value.bounds.atLeast == 0 {
optNesting += 1
optNesting = optNesting.addingOptional
}
q.child._addCaptures(to: &list, optionalNesting: optNesting)
addCaptures(of: q.child, optionalNesting: optNesting)

case .absentFunction(let abs):
switch abs.kind {
case .expression(_, _, let child):
child._addCaptures(to: &list, optionalNesting: nesting)
addCaptures(of: child, optionalNesting: nesting)
case .clearer, .repeater, .stopper:
break
}
Expand All @@ -122,16 +164,17 @@ extension AST.Node {
break
}
}
public static func build(_ ast: AST) -> CaptureList {
var builder = Self()
builder.captures.append(.init(optionalDepth: 0, .fake))
builder.addCaptures(of: ast.root, optionalNesting: .init(canNest: false))
return builder.captures
}
}

extension AST {
/// The capture list (including the whole match) of this AST.
public var captureList: CaptureList {
var caps = CaptureList()
caps.append(.init(optionalDepth: 0, .fake))
root._addCaptures(to: &caps, optionalNesting: 0)
return caps
}
public var captureList: CaptureList { .Builder.build(self) }
}

// MARK: Convenience for testing and inspection
Expand Down
22 changes: 8 additions & 14 deletions Sources/_RegexParser/Regex/Parse/CaptureStructure.swift
Original file line number Diff line number Diff line change
Expand Up @@ -225,33 +225,27 @@ extension CaptureStructure: CustomStringConvertible {
extension AST {
/// The capture structure of this AST for compiler communication.
var captureStructure: CaptureStructure {
captureList._captureStructure(nestOptionals: true)
captureList._captureStructure
}
}

// MARK: Convert CaptureList into CaptureStructure

extension CaptureList {
func _captureStructure(nestOptionals: Bool) -> CaptureStructure {
var _captureStructure: CaptureStructure {
if captures.isEmpty { return .empty }
if captures.count == 1 {
return captures.first!._captureStructure(nestOptionals: nestOptionals)
return captures.first!._captureStructure
}
return .tuple(captures.map {
$0._captureStructure(nestOptionals: nestOptionals)
})
return .tuple(captures.map(\._captureStructure))
}
}

extension CaptureList.Capture {
func _captureStructure(nestOptionals: Bool) -> CaptureStructure {
if optionalDepth == 0 {
return .atom(name: name, type: type == Substring.self ? nil : .init(type))
}
var copy = self
copy.optionalDepth = 0
var base = copy._captureStructure(nestOptionals: false)
for _ in 0..<(nestOptionals ? optionalDepth : 1) {
var _captureStructure: CaptureStructure {
var base = CaptureStructure.atom(
name: name, type: type == Substring.self ? nil : .init(type))
for _ in 0 ..< optionalDepth {
base = .optional(base)
}
return base
Expand Down
3 changes: 0 additions & 3 deletions Sources/_StringProcessing/ByteCodeGen.swift
Original file line number Diff line number Diff line change
Expand Up @@ -812,9 +812,6 @@ fileprivate extension Compiler.ByteCodeGen {
}
}

case let .regexLiteral(l):
return try emitNode(l.ast.dslTreeNode)

case let .convertedRegexLiteral(n, _):
return try emitNode(n)

Expand Down
27 changes: 14 additions & 13 deletions Sources/_StringProcessing/Capture.swift
Original file line number Diff line number Diff line change
Expand Up @@ -17,23 +17,24 @@ func constructExistentialOutputComponent(
component: (range: Range<String.Index>, value: Any?)?,
optionalCount: Int
) -> Any {
let someCount: Int
var underlying: Any
if let component = component {
underlying = component.value ?? input[component.range]
someCount = optionalCount
var underlying = component.value ?? input[component.range]
for _ in 0 ..< optionalCount {
func wrap<T>(_ x: T) {
underlying = Optional(x) as Any
}
_openExistential(underlying, do: wrap)
}
return underlying
} else {
// Ok since we Any-box every step up the ladder
underlying = Optional<Any>(nil) as Any
someCount = optionalCount - 1
}
for _ in 0..<someCount {
func wrap<T>(_ x: T) {
underlying = Optional(x) as Any
precondition(optionalCount > 0, "Must have optional type")
func makeNil<T>(_ x: T.Type) -> Any {
T?.none as Any
}
_openExistential(underlying, do: wrap)
let underlyingTy = TypeConstruction.optionalType(
of: Substring.self, depth: optionalCount - 1)
return _openExistential(underlyingTy, do: makeNil)
}
return underlying
}

@available(SwiftStdlib 5.7, *)
Expand Down
4 changes: 0 additions & 4 deletions Sources/_StringProcessing/ConsumerInterface.swift
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,6 @@ extension DSLTree.Node {
// TODO: Should we handle this here?
return nil

case .regexLiteral:
fatalError(
"unreachable: We should only ask atoms")

case let .convertedRegexLiteral(n, _):
return try n.generateConsumer(opts)

Expand Down
3 changes: 0 additions & 3 deletions Sources/_StringProcessing/PrintAsPattern.swift
Original file line number Diff line number Diff line change
Expand Up @@ -258,9 +258,6 @@ extension PrettyPrinter {
case let .quotedLiteral(v):
print(v._quoted)

case .regexLiteral:
printBackoff(node)

case let .convertedRegexLiteral(n, _):
// FIXME: This recursion coordinates with back-off
// check above, so it should work out. Need a
Expand Down
Loading