Skip to content

Add assertions to the DSL #154

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Feb 21, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
121 changes: 121 additions & 0 deletions Sources/_StringProcessing/RegexDSL/Anchor.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
//===----------------------------------------------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2021-2022 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
//
//===----------------------------------------------------------------------===//

import _MatchingEngine

public struct Anchor {
internal enum Kind {
case startOfSubject
case endOfSubjectBeforeNewline
case endOfSubject
case firstMatchingPositionInSubject
case textSegmentBoundary
case startOfLine
case endOfLine
case wordBoundary
}

var kind: Kind
var isInverted: Bool = false
}

extension Anchor: RegexProtocol {
var astAssertion: AST.Atom.AssertionKind {
if !isInverted {
switch kind {
case .startOfSubject: return .startOfSubject
case .endOfSubjectBeforeNewline: return .endOfSubjectBeforeNewline
case .endOfSubject: return .endOfSubject
case .firstMatchingPositionInSubject: return .firstMatchingPositionInSubject
case .textSegmentBoundary: return .textSegment
case .startOfLine: return .startOfLine
case .endOfLine: return .endOfLine
case .wordBoundary: return .wordBoundary
}
} else {
switch kind {
case .startOfSubject: fatalError("Not yet supported")
case .endOfSubjectBeforeNewline: fatalError("Not yet supported")
case .endOfSubject: fatalError("Not yet supported")
case .firstMatchingPositionInSubject: fatalError("Not yet supported")
case .textSegmentBoundary: return .notTextSegment
case .startOfLine: fatalError("Not yet supported")
case .endOfLine: fatalError("Not yet supported")
case .wordBoundary: return .notWordBoundary
}
}
}

public var regex: Regex<Substring> {
Regex(node: .atom(.assertion(astAssertion)))
}
}

// MARK: - Public API

extension Anchor {
public static var startOfSubject: Anchor {
Anchor(kind: .startOfSubject)
}

public static var endOfSubjectBeforeNewline: Anchor {
Anchor(kind: .endOfSubjectBeforeNewline)
}

public static var endOfSubject: Anchor {
Anchor(kind: .endOfSubject)
}

// TODO: Are we supporting this?
// public static var resetStartOfMatch: Anchor {
// Anchor(kind: resetStartOfMatch)
// }

public static var firstMatchingPositionInSubject: Anchor {
Anchor(kind: .firstMatchingPositionInSubject)
}

public static var textSegmentBoundary: Anchor {
Anchor(kind: .textSegmentBoundary)
}

public static var startOfLine: Anchor {
Anchor(kind: .startOfLine)
}

public static var endOfLine: Anchor {
Anchor(kind: .endOfLine)
}

public static var wordBoundary: Anchor {
Anchor(kind: .wordBoundary)
}

public var inverted: Anchor {
var result = self
result.isInverted.toggle()
return result
}
}

public func lookahead<R: RegexProtocol>(
negative: Bool = false,
@RegexBuilder _ content: () -> R
) -> Regex<R.Match> {
Regex(node: .group(negative ? .negativeLookahead : .lookahead, content().regex.root))
}

public func lookahead<R: RegexProtocol>(
_ component: R,
negative: Bool = false
) -> Regex<R.Match> {
Regex(node: .group(negative ? .negativeLookahead : .lookahead, component.regex.root))
}
27 changes: 27 additions & 0 deletions Tests/RegexTests/RegexDSLTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,33 @@ class RegexDSLTests: XCTestCase {
}
}
}

func testAssertions() throws {
try _testDSLCaptures(
("aaaaab", "aaaaab"),
("caaaaab", nil),
("aaaaabc", nil),
captureType: Substring.self, ==)
{
Anchor.startOfLine
"a".+
"b"
Anchor.endOfLine
}

try _testDSLCaptures(
("aaaaa1", "aaaaa1"),
("aaaaa2", nil),
("aaaaa", nil),
("aaaaab", nil),
captureType: Substring.self, ==)
{
"a".+
lookahead(CharacterClass.digit)
lookahead("2", negative: true)
CharacterClass.word
}
}

func testNestedGroups() throws {
try _testDSLCaptures(
Expand Down