Skip to content

Track source locations through the AST #57

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Dec 10, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
180 changes: 110 additions & 70 deletions Sources/_MatchingEngine/Regex/AST/AST.swift
Original file line number Diff line number Diff line change
@@ -1,118 +1,158 @@
/// A regex abstract syntax tree
public enum AST: ASTValue/*, ASTAction*/ {
public typealias Product = Self
public indirect enum AST:
Hashable/*, _ASTPrintable ASTValue, ASTAction*/
{
/// Located value: a value wrapped with a source range
public typealias Loc = Source.Value

/// ... | ... | ...
indirect case alternation([AST])
case alternation(Alternation)

/// ... ...
indirect case concatenation([AST])
case concatenation(Concatenation)

/// (...)
indirect case group(Group, AST)
case group(Group)

indirect case quantification(Quantifier, AST)
case quantification(Quantification)

case quote(String)
/// \Q...\E
case quote(Quote)

case trivia // TODO: track comments
///
case trivia(Trivia) // TODO: track comments

case atom(Atom)

case customCharacterClass(CustomCharacterClass)

case empty
case empty(Empty)


// FIXME: Move off the regex literal AST
indirect case groupTransform(
Group, AST, transform: CaptureTransform)
case groupTransform(
Group, transform: CaptureTransform)
}

extension AST {
public static var any: AST {
.atom(.any)
}
}

// Note that we're not yet an ASTEntity, would need to be a struct.
// We might end up with ASTStorage which projects the nice AST type.
// Values and projected entities can still refer to positions.
// ASTStorage might end up becoming the ASTAction conformer
// TODO: This is currently unused, but it's likely we'll want
// to host things like global options, more source info, etc.
private struct ASTStorage {
let ast: AST
let sourceRange: SourceRange?
}

extension AST {
public var isSemantic: Bool {
// :-(
var _associatedValue: _ASTNode {
switch self {
case .trivia: return false
default: return true
case let .alternation(v): return v
case let .concatenation(v): return v
case let .group(v): return v
case let .quantification(v): return v
case let .quote(v): return v
case let .trivia(v): return v
case let .atom(v): return v
case let .customCharacterClass(v): return v
case let .empty(v): return v

case let .groupTransform(g, _):
return g // FIXME: get this out of here
}
}

func filter(_ f: (AST) -> Bool) -> AST? {
func filt(_ children: [AST]) -> [AST] {
children.compactMap {
guard f($0) else { return nil }
return $0.filter(f)
}
/// If this node is a parent node, access its children
public var children: [AST]? {
return (_associatedValue as? _ASTParent)?.children
}

public var sourceRange: SourceRange {
_associatedValue.sourceRange
}

/// Whether this node is "trivia" or non-semantic, like comments
public var isTrivia: Bool {
switch self {
case .trivia: return true
default: return false
}
func filt(_ cc: CustomCharacterClass) -> CustomCharacterClass {
CustomCharacterClass(cc.start, filt(cc.members))
}

/// Whether this node has nested somewhere inside it a capture
public var hasCapture: Bool {
if case let .group(g) = self, g.kind.value.isCapturing {
return true
}

return self.children?.any(\.hasCapture) ?? false
}
}

// MARK: - AST types

extension AST {

public struct Alternation: Hashable, _ASTNode {
public let children: [AST]
public let sourceRange: SourceRange

public init(_ mems: [AST], _ sourceRange: SourceRange) {
self.children = mems
self.sourceRange = sourceRange
}
typealias CCCMember = CustomCharacterClass.Member
func filt(_ children: [CCCMember]) -> [CCCMember] {
children.compactMap {
switch $0 {
case let .custom(cc):
return .custom(filt(cc))
case .range(let lhs, let rhs):
guard let filtLHS = f(.atom(lhs)) ? lhs : nil else { return nil }
guard let filtRHS = f(.atom(rhs)) ? rhs : nil else { return nil }
return .range(filtLHS, filtRHS)
case let .atom(atom):
return f(.atom(atom)) ? .atom(atom) : nil
case let .setOperation(lhsMembers, op, rhsMembers):
return .setOperation(filt(lhsMembers), op, filt(rhsMembers))
}
}

public var _dumpBase: String { "alternation" }
}

public struct Concatenation: Hashable, _ASTNode {
public let children: [AST]
public let sourceRange: SourceRange

public init(_ mems: [AST], _ sourceRange: SourceRange) {
self.children = mems
self.sourceRange = sourceRange
}
switch self {
case let .alternation(children):
return .alternation(filt(children))

case let .concatenation(children):
return .concatenation(filt(children))
public var _dumpBase: String { "" }
}

case let .customCharacterClass(cc):
return .customCharacterClass(filt(cc))
public struct Quote: Hashable, _ASTNode {
public let literal: String
public let sourceRange: SourceRange

case let .group(g, child):
guard let c = child.filter(f) else { return nil }
return .group(g, c)
public init(_ s: String, _ sourceRange: SourceRange) {
self.literal = s
self.sourceRange = sourceRange
}

public var _dumpBase: String { "quote" }
}

case let .groupTransform(g, child, transform):
guard let c = child.filter(f) else { return nil }
return .groupTransform(g, c, transform: transform)
public struct Trivia: Hashable, _ASTNode {
// TODO: Contents of trivia, kinds, etc
public let sourceRange: SourceRange

case let .quantification(q, child):
guard let c = child.filter(f) else { return nil }
return .quantification(q, c)
public init(_ sourceRange: SourceRange) {
self.sourceRange = sourceRange
}

case .any, .trivia, .quote, .atom, .empty:
return f(self) ? self : nil
public var _dumpBase: String {
// TODO: comments, non-semantic whitespace, etc.
""
}
}

public var strippingTrivia: AST? {
filter(\.isSemantic)
public struct Empty: Hashable, _ASTNode {
public let sourceRange: SourceRange

public init(_ sourceRange: SourceRange) {
self.sourceRange = sourceRange
}

public var _dumpBase: String { "" }
}
}

// FIXME: Probably remove this from the AST

// FIXME: Get this out of here
public struct CaptureTransform: Equatable, Hashable, CustomStringConvertible {
public let closure: (Substring) -> Any

Expand Down
65 changes: 38 additions & 27 deletions Sources/_MatchingEngine/Regex/AST/ASTBuilder.swift
Original file line number Diff line number Diff line change
Expand Up @@ -16,25 +16,28 @@ AST.

public let _fakeLoc = "".startIndex
public let _fakeRange = _fakeLoc ..< _fakeLoc
public func _fake<T: Hashable>(_ t: T) -> AST.Loc<T> {
.init(t, _fakeRange)
}

public func alt(_ asts: [AST]) -> AST {
.alternation(asts)
.alternation(.init(asts, _fakeRange))
}
public func alt(_ asts: AST...) -> AST {
alt(asts)
}

public func concat(_ asts: [AST]) -> AST {
.concatenation(asts)
.concatenation(.init(asts, _fakeRange))
}
public func concat(_ asts: AST...) -> AST {
concat(asts)
}

public func group(
_ kind: Group.Kind, _ child: AST
_ kind: AST.Group.Kind, _ child: AST
) -> AST {
.group(Group(kind, _fakeRange), child)
.group(.init(_fake(kind), child, _fakeRange))
}
public func capture(
_ child: AST
Expand All @@ -50,7 +53,7 @@ public func namedCapture(
_ name: String,
_ child: AST
) -> AST {
group(.namedCapture(name), child)
group(.namedCapture(_fake(name)), child)
}
public func nonCaptureReset(
_ child: AST
Expand All @@ -75,68 +78,71 @@ public func negativeLookbehind(_ child: AST) -> AST {
group(.negativeLookbehind, child)
}


public var any: AST { .atom(.any) }

public func quant(
_ amount: Quantifier.Amount,
_ kind: Quantifier.Kind = .greedy,
_ amount: AST.Quantification.Amount,
_ kind: AST.Quantification.Kind = .greedy,
_ child: AST
) -> AST {
.quantification(Quantifier(amount, kind, _fakeRange), child)
.quantification(.init(
_fake(amount), _fake(kind), child, _fakeRange))
}
public func zeroOrMore(
_ kind: Quantifier.Kind = .greedy,
_ kind: AST.Quantification.Kind = .greedy,
_ child: AST
) -> AST {
quant(.zeroOrMore, kind, child)
}
public func zeroOrOne(
_ kind: Quantifier.Kind = .greedy,
_ kind: AST.Quantification.Kind = .greedy,
_ child: AST
) -> AST {
quant(.zeroOrOne, kind, child)
}
public func oneOrMore(
_ kind: Quantifier.Kind = .greedy,
_ kind: AST.Quantification.Kind = .greedy,
_ child: AST
) -> AST {
quant(.oneOrMore, kind, child)
}
public func exactly(
_ kind: Quantifier.Kind = .greedy,
_ kind: AST.Quantification.Kind = .greedy,
_ i: Int,
child: AST
_ child: AST
) -> AST {
quant(.exactly(i), kind, child)
quant(.exactly(_fake(i)), kind, child)
}
public func nOrMore(
_ kind: Quantifier.Kind = .greedy,
_ kind: AST.Quantification.Kind = .greedy,
_ i: Int,
child: AST
_ child: AST
) -> AST {
quant(.nOrMore(i), kind, child)
quant(.nOrMore(_fake(i)), kind, child)
}
public func upToN(
_ kind: Quantifier.Kind = .greedy,
_ kind: AST.Quantification.Kind = .greedy,
_ i: Int,
child: AST
_ child: AST
) -> AST {
quant(.upToN(i), kind, child)
quant(.upToN(_fake(i)), kind, child)
}
public func quantRange(
_ kind: Quantifier.Kind = .greedy,
_ kind: AST.Quantification.Kind = .greedy,
_ r: ClosedRange<Int>,
child: AST
_ child: AST
) -> AST {
quant(.range(r), kind, child)
let range = _fake(r.lowerBound) ... _fake(r.upperBound)
return quant(.range(range), kind, child)
}

public func charClass(
_ members: CustomCharacterClass.Member...,
inverted: Bool = false
) -> AST {
let cc = CustomCharacterClass(
inverted ? .inverted : .normal, members
inverted ? .inverted : .normal, members, _fakeRange
)
return .customCharacterClass(cc)
}
Expand All @@ -145,16 +151,21 @@ public func charClass(
inverted: Bool = false
) -> CustomCharacterClass.Member {
let cc = CustomCharacterClass(
inverted ? .inverted : .normal, members
inverted ? .inverted : .normal, members, _fakeRange
)
return .custom(cc)
}
public func posixSet(
_ set: Unicode.POSIXCharacterSet, inverted: Bool = false
) -> Atom {
return .namedSet(.init(inverted: inverted, set: set))
.namedSet(.init(inverted: inverted, set: set))
}

public func quote(_ s: String) -> AST {
.quote(.init(s, _fakeRange))
}
func prop(

public func prop(
_ kind: Atom.CharacterProperty.Kind, inverted: Bool = false
) -> Atom {
return .property(.init(kind, isInverted: inverted))
Expand Down
Loading