Skip to content

Some AST construction and DSL refactoring #60

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Dec 8, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 32 additions & 15 deletions Sources/VariadicsGenerator/VariadicsGenerator.swift
Original file line number Diff line number Diff line change
Expand Up @@ -63,13 +63,23 @@ func output(_ content: String) {
}

func outputForEach<C: Collection>(
_ elements: C, separator: String, _ content: (C.Element) -> String
_ elements: C,
separator: String,
lineTerminator: String? = nil,
_ content: (C.Element) -> String
) {
for i in elements.indices {
output(content(elements[i]))
if elements.index(after: i) != elements.endIndex {
let needsSep = elements.index(after: i) != elements.endIndex
if needsSep {
output(separator)
}
if let lt = lineTerminator {
let indent = needsSep ? " " : " "
output("\(lt)\n\(indent)")
} else if needsSep {
output(" ")
}
}
}

Expand Down Expand Up @@ -97,7 +107,12 @@ struct VariadicsGenerator: ParsableCommand {
precondition(maxArity > 1)
precondition(maxArity < Counter.bitWidth)

output("// BEGIN AUTO-GENERATED CONTENT\n\n\n")
output("""
// BEGIN AUTO-GENERATED CONTENT

import _MatchingEngine

""")

for arity in minArity...maxArity {
for permutation in Permutations(arity: arity) {
Expand All @@ -118,12 +133,12 @@ struct VariadicsGenerator: ParsableCommand {
// public init(...) { ... }
// }
let typeName = "\(concatenationStructTypeBaseName)\(arity)_\(permutation.identifier)"
output("public struct \(typeName)<")
outputForEach(0..<arity, separator: ", ") { "T\($0): \(patternProtocolName)" }
output(">: \(patternProtocolName)")
output("public struct \(typeName)<\n ")
outputForEach(0..<arity, separator: ",") { "T\($0): \(patternProtocolName)" }
output("\n>: \(patternProtocolName)")
if permutation.hasCaptureless {
output(" where ")
outputForEach(permutation.capturelessIndices, separator: ", ") {
outputForEach(permutation.capturelessIndices, separator: ",") {
"T\($0).\(captureAssociatedTypeName): \(emptyProtocolName)"
}
}
Expand All @@ -140,26 +155,28 @@ struct VariadicsGenerator: ParsableCommand {
output("\n")
output(" public let \(patternProtocolRequirementName): \(PatternTypeBaseName)<\(captureAssociatedTypeName)>\n")
output(" init(")
outputForEach(0..<arity, separator: ", ") { "_ x\($0): T\($0)" }
outputForEach(0..<arity, separator: ",") { "_ x\($0): T\($0)" }
output(") {\n")
output(" \(patternProtocolRequirementName) = .init(ast: .concatenation([")
outputForEach(0..<arity, separator: ", ") { i in
output(" \(patternProtocolRequirementName) = .init(ast: concat(\n ")
outputForEach(
0..<arity, separator: ",", lineTerminator: ""
) { i in
"x\(i).\(patternProtocolRequirementName).ast"
}
output("]))\n")
output("))\n")
output(" }\n}\n\n")

// Emit concatenation builders.
output("extension \(patternBuilderTypeName) {\n")
output(" public static func buildBlock<")
outputForEach(0..<arity, separator: ", ") { "T\($0)" }
outputForEach(0..<arity, separator: ",") { "T\($0)" }
output(">(\n ")
outputForEach(0..<arity, separator: ", ") { "_ x\($0): T\($0)" }
outputForEach(0..<arity, separator: ",") { "_ x\($0): T\($0)" }
output("\n ) -> \(typeName)<")
outputForEach(0..<arity, separator: ", ") { "T\($0)" }
outputForEach(0..<arity, separator: ",") { "T\($0)" }
output("> {\n")
output(" \(typeName)(")
outputForEach(0..<arity, separator: ", ") { "x\($0)" }
outputForEach(0..<arity, separator: ",") { "x\($0)" }
output(")\n }\n}\n\n")
}
}
11 changes: 6 additions & 5 deletions Sources/_MatchingEngine/Regex/AST/AST.swift
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/// A regex abstract syntax tree
public enum AST: ASTValue, ASTAction {
public enum AST: ASTValue/*, ASTAction*/ {
public typealias Product = Self

/// ... | ... | ...
Expand All @@ -11,10 +11,6 @@ public enum AST: ASTValue, ASTAction {
/// (...)
indirect case group(Group, AST)

/// Group with a registered transform
indirect case groupTransform(
Group, AST, transform: CaptureTransform)

indirect case quantification(Quantifier, AST)

case quote(String)
Expand All @@ -26,6 +22,11 @@ public enum AST: ASTValue, ASTAction {
case customCharacterClass(CustomCharacterClass)

case empty


// FIXME: Move off the regex literal AST
indirect case groupTransform(
Group, AST, transform: CaptureTransform)
}

extension AST {
Expand Down
41 changes: 22 additions & 19 deletions Sources/_MatchingEngine/Regex/AST/ASTAction.swift
Original file line number Diff line number Diff line change
@@ -1,20 +1,23 @@
public protocol ASTAction {
// TODO: associated types, or just make them produce AST nodes?
// TODO: This might be interesting in the future, but for now
// we make trees

// MARK: Group
static func capture(_ a: AST, _ sr: SourceRange?) -> AST

static func nonCapture(_ a: AST, _ sr: SourceRange?) -> AST

// MARK: Quantification

static func zeroOrMore(
_ kind: Quantifier.Kind, _ a: AST, _ r: SourceRange?
) -> AST
static func oneOrMore(
_ kind: Quantifier.Kind, _ a: AST, _ r: SourceRange?
) -> AST
static func zeroOrOne(
_ kind: Quantifier.Kind, _ a: AST, _ r: SourceRange?
) -> AST
}
//public protocol ASTAction {
// // TODO: associated types, or just make them produce AST nodes?
//
// // MARK: Group
// static func capture(_ a: AST, _ sr: SourceRange?) -> AST
//
// static func nonCapture(_ a: AST, _ sr: SourceRange?) -> AST
//
// // MARK: Quantification
//
// static func zeroOrMore(
// _ kind: Quantifier.Kind, _ a: AST, _ r: SourceRange?
// ) -> AST
// static func oneOrMore(
// _ kind: Quantifier.Kind, _ a: AST, _ r: SourceRange?
// ) -> AST
// static func zeroOrOne(
// _ kind: Quantifier.Kind, _ a: AST, _ r: SourceRange?
// ) -> AST
//}
156 changes: 156 additions & 0 deletions Sources/_MatchingEngine/Regex/AST/ASTBuilder.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
/*

These functions are temporary AST construction helpers. As
the AST gets more and more source-location tracking, we'll
want easier migration paths for our parser tests (which
construct and compare location-less AST nodes) as well as the
result builder DSL (which has a different notion of location).

Without real namespaces and `using`, attempts at
pseudo-namespaces tie the use site to being nested inside a
type. So for now, these are global, but they will likely be
namespaced in the future if/when clients are weaned off the
AST.

*/

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For namespacing, could you move these top-level symbols to an enum, e.g. ASTBuilder, as static methods?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe eventually. I'm inclined not to for now because otherwise you end up either with a bunch of ASTBuilder.concat(ASTBuilder.atom....) in unit tests, or every client just reinvents these. Since the contextual type requested is AST and not ASTBuilder, they wouldn't be seen for lookup unless everything was typed as a builder instead of an AST. These don't work well hosted on AST because names can collide with case names.

Longer term, I don't think we'll be as enthusiastically creating AST nodes for every need. This is public in the "core" module right now (so visible to _StringProcessing but not its clients). Another option is to make these internal inside _StringProcessing and the test use case can use @testable anyways.

Also, we'll see if AST even stays as an enum or in its current form. We might need to overhaul it more for options tracking.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since the contextual type requested is AST and not ASTBuilder, they wouldn't be seen for lookup unless everything was typed as a builder instead of an AST.

Hmm, these could also be static methods on AST itself though. I'm still concerned about polluting the global namespace.

Another option is to make these internal inside _StringProcessing and the test use case can use @testable anyways.

I think that this approach works better; we only build location-less ASTs in _StringProcessing module and its tests.

public let _fakeLoc = "".startIndex
public let _fakeRange = _fakeLoc ..< _fakeLoc

public func alt(_ asts: [AST]) -> AST {
.alternation(asts)
}
public func alt(_ asts: AST...) -> AST {
alt(asts)
}

public func concat(_ asts: [AST]) -> AST {
.concatenation(asts)
}
public func concat(_ asts: AST...) -> AST {
concat(asts)
}

public func group(
_ kind: Group.Kind, _ child: AST
) -> AST {
.group(Group(kind, _fakeRange), child)
}
public func capture(
_ child: AST
) -> AST {
group(.capture, child)
}
public func nonCapture(
_ child: AST
) -> AST {
group(.nonCapture, child)
}
public func namedCapture(
_ name: String,
_ child: AST
) -> AST {
group(.namedCapture(name), child)
}
public func nonCaptureReset(
_ child: AST
) -> AST {
group(.nonCaptureReset, child)
}
public func atomicNonCapturing(
_ child: AST
) -> AST {
group(.atomicNonCapturing, child)
}
public func lookahead(_ child: AST) -> AST {
group(.lookahead, child)
}
public func lookbehind(_ child: AST) -> AST {
group(.lookbehind, child)
}
public func negativeLookahead(_ child: AST) -> AST {
group(.negativeLookahead, child)
}
public func negativeLookbehind(_ child: AST) -> AST {
group(.negativeLookbehind, child)
}

public var any: AST { .atom(.any) }

public func quant(
_ amount: Quantifier.Amount,
_ kind: Quantifier.Kind = .greedy,
_ child: AST
) -> AST {
.quantification(Quantifier(amount, kind, _fakeRange), child)
}
public func zeroOrMore(
_ kind: Quantifier.Kind = .greedy,
_ child: AST
) -> AST {
quant(.zeroOrMore, kind, child)
}
public func zeroOrOne(
_ kind: Quantifier.Kind = .greedy,
_ child: AST
) -> AST {
quant(.zeroOrOne, kind, child)
}
public func oneOrMore(
_ kind: Quantifier.Kind = .greedy,
_ child: AST
) -> AST {
quant(.oneOrMore, kind, child)
}
public func exactly(
_ kind: Quantifier.Kind = .greedy,
_ i: Int,
child: AST
) -> AST {
quant(.exactly(i), kind, child)
}
public func nOrMore(
_ kind: Quantifier.Kind = .greedy,
_ i: Int,
child: AST
) -> AST {
quant(.nOrMore(i), kind, child)
}
public func upToN(
_ kind: Quantifier.Kind = .greedy,
_ i: Int,
child: AST
) -> AST {
quant(.upToN(i), kind, child)
}
public func quantRange(
_ kind: Quantifier.Kind = .greedy,
_ r: ClosedRange<Int>,
child: AST
) -> AST {
quant(.range(r), kind, child)
}

public func charClass(
_ members: CustomCharacterClass.Member...,
inverted: Bool = false
) -> AST {
let cc = CustomCharacterClass(
inverted ? .inverted : .normal, members
)
return .customCharacterClass(cc)
}
public func charClass(
_ members: CustomCharacterClass.Member...,
inverted: Bool = false
) -> CustomCharacterClass.Member {
let cc = CustomCharacterClass(
inverted ? .inverted : .normal, members
)
return .custom(cc)
}
public func posixSet(
_ set: Unicode.POSIXCharacterSet, inverted: Bool = false
) -> Atom {
return .named(.init(inverted: inverted, set: set))
}
2 changes: 1 addition & 1 deletion Sources/_MatchingEngine/Regex/AST/ASTEntity.swift
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ public protocol ASTValue: _ASTPrintable, Hashable {
///
/// Tracks source location information
public protocol ASTEntity: ASTValue {
var sourceRange: SourceRange? { get }
var sourceRange: SourceRange { get }
}

public protocol ASTParentEntity: ASTEntity, _ASTPrintableNested {
Expand Down
Loading