Skip to content

[5.7] Obtain match output elements without materializing the output. #498

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 28, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 59 additions & 0 deletions Sources/_RegexParser/Utility/TypeConstruction.swift
Original file line number Diff line number Diff line change
Expand Up @@ -139,3 +139,62 @@ public enum TypeConstruction {
return _openExistential(childType, do: helper)
}
}

extension TypeConstruction {
public static func optionalType<Base>(
of base: Base.Type, depth: Int = 1
) -> Any.Type {
switch depth {
case 0: return base
case 1: return Base?.self
case 2: return Base??.self
case 3: return Base???.self
case 4: return Base????.self
default:
return optionalType(of: Base????.self, depth: depth - 4)
}
}
}

extension MemoryLayout {
/// Returns the element index that corresponnds to the given tuple element key
/// path.
/// - Parameters:
/// - keyPath: The key path from a tuple to one of its elements.
/// - elementTypes: The element type of the tuple type.
// TODO: It possible to get element types from the type metadata, but it's
// more efficient to pass them in since we already know them in the matching
// engine.
public static func tupleElementIndex<ElementTypes: Collection>(
of keyPath: PartialKeyPath<T>,
elementTypes: ElementTypes
) -> Int? where ElementTypes.Element == Any.Type {
guard let byteOffset = offset(of: keyPath) else {
return nil
}
if byteOffset == 0 { return 0 }
var currentOffset = 0
for (index, type) in elementTypes.enumerated() {
func sizeAndAlignMask<T>(_: T.Type) -> (Int, Int) {
(MemoryLayout<T>.size, MemoryLayout<T>.alignment - 1)
}
// The ABI of an offset-based key path only stores the byte offset, so
// this doesn't work if there's a 0-sized element, e.g. `Void`,
// `(Void, Void)`. (rdar://63819465)
if size == 0 {
return nil
}
let (size, alignMask) = _openExistential(type, do: sizeAndAlignMask)
// Align up the offset for this type.
currentOffset = (currentOffset + alignMask) & ~alignMask
// If it matches the offset we are looking for, `index` is the tuple
// element index.
if currentOffset == byteOffset {
return index
}
// Advance to the past-the-end offset for this element.
currentOffset += size
}
return nil
}
}
13 changes: 10 additions & 3 deletions Sources/_StringProcessing/Regex/AnyRegexOutput.swift
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,11 @@ extension AnyRegexOutput: RandomAccessCollection {

/// The captured value, `nil` for no-capture
public var value: Any? {
// FIXME: Should this return the substring for default-typed
// values?
representation.value
representation.value ?? substring
}

internal var type: Any.Type {
representation.type
}

/// The name of this capture, if it has one, otherwise `nil`.
Expand Down Expand Up @@ -263,4 +265,9 @@ extension AnyRegexOutput.ElementRepresentation {
optionalCount: optionalDepth
)
}

var type: Any.Type {
value.map { Swift.type(of: $0) }
?? TypeConstruction.optionalType(of: Substring.self, depth: optionalDepth)
}
}
33 changes: 26 additions & 7 deletions Sources/_StringProcessing/Regex/Match.swift
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@ extension Regex {

@available(SwiftStdlib 5.7, *)
extension Regex.Match {
var input: String {
anyRegexOutput.input
}

/// The output produced from the match operation.
public var output: Output {
if Output.self == AnyRegexOutput.self {
Expand All @@ -37,33 +41,48 @@ extension Regex.Match {
)

let output = AnyRegexOutput(
input: anyRegexOutput.input,
input: input,
elements: [wholeMatchCapture] + anyRegexOutput._elements
)

return output as! Output
} else if Output.self == Substring.self {
// FIXME: Plumb whole match (`.0`) through the matching engine.
return anyRegexOutput.input[range] as! Output
} else if anyRegexOutput.isEmpty, value != nil {
return input[range] as! Output
} else if anyRegexOutput.isEmpty, let value {
// FIXME: This is a workaround for whole-match values not
// being modeled as part of captures. We might want to
// switch to a model where results are alongside captures
return value! as! Output
return value as! Output
} else {
guard value == nil else {
fatalError("FIXME: what would this mean?")
}
let typeErasedMatch = anyRegexOutput.existentialOutput(
from: anyRegexOutput.input[range]
from: input[range]
)
return typeErasedMatch as! Output
}
}

var wholeMatchType: Any.Type {
value.map { type(of: $0) } ?? Substring.self
}

/// Accesses a capture by its name or number.
public subscript<T>(dynamicMember keyPath: KeyPath<Output, T>) -> T {
output[keyPath: keyPath]
// Note: We should be able to get the element offset from the key path
// itself even at compile time. We need a better way of doing this.
guard let outputTupleOffset = MemoryLayout.tupleElementIndex(
of: keyPath, elementTypes: [wholeMatchType] + anyRegexOutput.map(\.type)
) else {
return output[keyPath: keyPath]
}
if outputTupleOffset == 0 {
return value.map { $0 as! T } ?? (input[range] as! T)
} else {
return anyRegexOutput[outputTupleOffset - 1].value as! T
}
}

/// Accesses a capture using the `.0` syntax, even when the match isn't a tuple.
Expand All @@ -83,7 +102,7 @@ extension Regex.Match {
}

return element.existentialOutputComponent(
from: anyRegexOutput.input[...]
from: input[...]
) as! Capture
}
}
Expand Down
14 changes: 13 additions & 1 deletion Tests/RegexBuilderTests/RegexDSLTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -427,7 +427,19 @@ class RegexDSLTests: XCTestCase {
CharacterClass.digit
}
}


try _testDSLCaptures(
("abcdef2", ("abcdef2", "f")),
matchType: (Substring, Substring??).self, ==)
{
Optionally {
ZeroOrMore {
Capture(CharacterClass.word)
}
CharacterClass.digit
}
}

try _testDSLCaptures(
("aaabbbcccdddeeefff", "aaabbbcccdddeeefff"),
("aaaabbbcccdddeeefff", nil),
Expand Down