-
Notifications
You must be signed in to change notification settings - Fork 50
Add assertions to the DSL #154
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 5 commits
2f825ff
38d61b3
091443b
2741126
31d0a05
c674db9
f629a4a
c970ab2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,134 @@ | ||
//===----------------------------------------------------------------------===// | ||
// | ||
// This source file is part of the Swift.org open source project | ||
// | ||
// Copyright (c) 2021-2022 Apple Inc. and the Swift project authors | ||
// Licensed under Apache License v2.0 with Runtime Library Exception | ||
// | ||
// See https://swift.org/LICENSE.txt for license information | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
import _MatchingEngine | ||
|
||
public struct Assertion { | ||
internal enum Kind { | ||
case startOfSubject | ||
case endOfSubjectBeforeNewline | ||
case endOfSubject | ||
case firstMatchingPositionInSubject | ||
case textSegmentBoundary | ||
case startOfLine | ||
case endOfLine | ||
case wordBoundary | ||
case lookahead(DSLTree.Node) | ||
} | ||
|
||
var kind: Kind | ||
var isInverted: Bool = false | ||
} | ||
|
||
extension Assertion: RegexProtocol { | ||
var astAssertion: AST.Atom.AssertionKind? { | ||
if !isInverted { | ||
switch kind { | ||
case .startOfSubject: return .startOfSubject | ||
case .endOfSubjectBeforeNewline: return .endOfSubjectBeforeNewline | ||
case .endOfSubject: return .endOfSubject | ||
case .firstMatchingPositionInSubject: return .firstMatchingPositionInSubject | ||
case .textSegmentBoundary: return .textSegment | ||
case .startOfLine: return .startOfLine | ||
case .endOfLine: return .endOfLine | ||
case .wordBoundary: return .wordBoundary | ||
default: return nil | ||
} | ||
} else { | ||
switch kind { | ||
case .startOfSubject: fatalError("Not yet supported") | ||
case .endOfSubjectBeforeNewline: fatalError("Not yet supported") | ||
case .endOfSubject: fatalError("Not yet supported") | ||
case .firstMatchingPositionInSubject: fatalError("Not yet supported") | ||
case .textSegmentBoundary: return .notTextSegment | ||
case .startOfLine: fatalError("Not yet supported") | ||
case .endOfLine: fatalError("Not yet supported") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We don't currently have a representation for these negated assertions in the AST, since things like There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why are we going to an AST here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
case .wordBoundary: return .notWordBoundary | ||
default: return nil | ||
} | ||
} | ||
} | ||
|
||
public var regex: Regex<Substring> { | ||
if let assertionKind = astAssertion { | ||
return Regex(node: .atom(.assertion(assertionKind))) | ||
} | ||
|
||
switch (kind, isInverted) { | ||
case let (.lookahead(node), false): | ||
return Regex(node: .group(.lookahead, node)) | ||
case let (.lookahead(node), true): | ||
return Regex(node: .group(.negativeLookahead, node)) | ||
|
||
default: | ||
fatalError("Unsupported assertion") | ||
} | ||
} | ||
} | ||
|
||
// MARK: - Public API | ||
|
||
extension Assertion { | ||
public static var startOfSubject: Assertion { | ||
Assertion(kind: .startOfSubject) | ||
} | ||
|
||
public static var endOfSubjectBeforeNewline: Assertion { | ||
Assertion(kind: .endOfSubjectBeforeNewline) | ||
} | ||
|
||
public static var endOfSubject: Assertion { | ||
Assertion(kind: .endOfSubject) | ||
} | ||
|
||
// TODO: Are we supporting this? | ||
// public static var resetStartOfMatch: Assertion { | ||
// Assertion(kind: resetStartOfMatch) | ||
// } | ||
|
||
public static var firstMatchingPositionInSubject: Assertion { | ||
Assertion(kind: .firstMatchingPositionInSubject) | ||
} | ||
|
||
public static var textSegmentBoundary: Assertion { | ||
Assertion(kind: .textSegmentBoundary) | ||
} | ||
|
||
public static var startOfLine: Assertion { | ||
Assertion(kind: .startOfLine) | ||
} | ||
|
||
public static var endOfLine: Assertion { | ||
Assertion(kind: .endOfLine) | ||
} | ||
|
||
public static var wordBoundary: Assertion { | ||
Assertion(kind: .wordBoundary) | ||
} | ||
|
||
public var inverted: Assertion { | ||
var result = self | ||
result.isInverted.toggle() | ||
return result | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would we want an There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't know that a property makes sense if we aren't going to also expose There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Everything in this PR can be inverted, just need a little more plumbing. If we want to provide the functionality of a "reset match" assertion, that could just be a separate function or type, since it isn't an anchor anyway. |
||
} | ||
|
||
extension Assertion { | ||
public static func lookahead<R: RegexProtocol>( | ||
@RegexBuilder _ content: () -> R | ||
) -> Assertion { | ||
lookahead(content()) | ||
} | ||
|
||
public static func lookahead<R: RegexProtocol>(_ component: R) -> Assertion { | ||
Assertion(kind: .lookahead(component.regex.root)) | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -180,6 +180,31 @@ class RegexDSLTests: XCTestCase { | |
} | ||
} | ||
} | ||
|
||
func testAssertions() throws { | ||
try _testDSLCaptures( | ||
("aaaaab", "aaaaab"), | ||
("caaaaab", nil), | ||
("aaaaabc", nil), | ||
captureType: Substring.self, ==) | ||
{ | ||
Assertion.startOfLine | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Many of the built-in ones are more commonly called "anchors", which might be worth considering too. |
||
"a".+ | ||
"b" | ||
Assertion.endOfLine | ||
} | ||
|
||
try _testDSLCaptures( | ||
("aaaaa1", "aaaaa1"), | ||
("aaaaa", nil), | ||
("aaaaab", nil), | ||
captureType: Substring.self, ==) | ||
{ | ||
"a".+ | ||
Assertion.lookahead(CharacterClass.digit) | ||
CharacterClass.word | ||
} | ||
} | ||
|
||
func testNestedGroups() throws { | ||
try _testDSLCaptures( | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this meant to be a listing of built-in assertions, or are each of these the kinds of assertions someone could write?