Skip to content

Commit 3ff4620

Browse files
committed
Merge branch 'main' into sendable
2 parents 46d7cde + dcf7dd8 commit 3ff4620

38 files changed

+1860
-631
lines changed

Package.swift

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,13 @@ let package = Package(
105105
"_RegexParser",
106106
"_StringProcessing"
107107
]),
108+
.executableTarget(
109+
name: "RegexTester",
110+
dependencies: [
111+
.product(name: "ArgumentParser", package: "swift-argument-parser"),
112+
"_RegexParser",
113+
"_StringProcessing"
114+
]),
108115

109116
// MARK: Exercises
110117
.target(

Sources/RegexBuilder/Algorithms.swift

Lines changed: 70 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
//
1010
//===----------------------------------------------------------------------===//
1111

12-
import _StringProcessing
12+
@_spi(RegexBuilder) import _StringProcessing
1313

1414
// FIXME(rdar://92459215): We should be using 'some RegexComponent' instead of
1515
// <R: RegexComponent> for the methods below that don't impose any additional
@@ -23,9 +23,9 @@ extension BidirectionalCollection where SubSequence == Substring {
2323
/// - Parameter content: A closure that returns a regex to match against.
2424
/// - Returns: The match if there is one, or `nil` if none.
2525
@available(SwiftStdlib 5.7, *)
26-
public func wholeMatch<R: RegexComponent>(
27-
@RegexComponentBuilder of content: () -> R
28-
) -> Regex<R.RegexOutput>.Match? {
26+
public func wholeMatch<Output>(
27+
@RegexComponentBuilder of content: () -> some RegexComponent<Output>
28+
) -> Regex<Output>.Match? {
2929
wholeMatch(of: content())
3030
}
3131

@@ -35,9 +35,9 @@ extension BidirectionalCollection where SubSequence == Substring {
3535
/// - Parameter content: A closure that returns a regex to match against.
3636
/// - Returns: The match if there is one, or `nil` if none.
3737
@available(SwiftStdlib 5.7, *)
38-
public func prefixMatch<R: RegexComponent>(
39-
@RegexComponentBuilder of content: () -> R
40-
) -> Regex<R.RegexOutput>.Match? {
38+
public func prefixMatch<Output>(
39+
@RegexComponentBuilder of content: () -> some RegexComponent<Output>
40+
) -> Regex<Output>.Match? {
4141
prefixMatch(of: content())
4242
}
4343

@@ -49,8 +49,8 @@ extension BidirectionalCollection where SubSequence == Substring {
4949
/// - Returns: `true` if the regex returned by `content` matched anywhere in
5050
/// this collection, otherwise `false`.
5151
@available(SwiftStdlib 5.7, *)
52-
public func contains<R: RegexComponent>(
53-
@RegexComponentBuilder _ content: () -> R
52+
public func contains(
53+
@RegexComponentBuilder _ content: () -> some RegexComponent
5454
) -> Bool {
5555
contains(content())
5656
}
@@ -63,8 +63,8 @@ extension BidirectionalCollection where SubSequence == Substring {
6363
/// match of if the regex returned by `content`. Returns `nil` if no match
6464
/// for the regex is found.
6565
@available(SwiftStdlib 5.7, *)
66-
public func firstRange<R: RegexComponent>(
67-
@RegexComponentBuilder of content: () -> R
66+
public func firstRange(
67+
@RegexComponentBuilder of content: () -> some RegexComponent
6868
) -> Range<Index>? {
6969
firstRange(of: content())
7070
}
@@ -78,8 +78,8 @@ extension BidirectionalCollection where SubSequence == Substring {
7878
/// `content`. Returns an empty collection if no match for the regex
7979
/// is found.
8080
@available(SwiftStdlib 5.7, *)
81-
public func ranges<R: RegexComponent>(
82-
@RegexComponentBuilder of content: () -> R
81+
public func ranges(
82+
@RegexComponentBuilder of content: () -> some RegexComponent
8383
) -> [Range<Index>] {
8484
ranges(of: content())
8585
}
@@ -99,10 +99,10 @@ extension BidirectionalCollection where SubSequence == Substring {
9999
/// - Returns: A collection of substrings, split from this collection's
100100
/// elements.
101101
@available(SwiftStdlib 5.7, *)
102-
public func split<R: RegexComponent>(
102+
public func split(
103103
maxSplits: Int = Int.max,
104104
omittingEmptySubsequences: Bool = true,
105-
@RegexComponentBuilder separator: () -> R
105+
@RegexComponentBuilder separator: () -> some RegexComponent
106106
) -> [SubSequence] {
107107
split(separator: separator(), maxSplits: maxSplits, omittingEmptySubsequences: omittingEmptySubsequences)
108108
}
@@ -115,8 +115,8 @@ extension BidirectionalCollection where SubSequence == Substring {
115115
/// - Returns: `true` if the initial elements of this collection match
116116
/// regex returned by `content`; otherwise, `false`.
117117
@available(SwiftStdlib 5.7, *)
118-
public func starts<R: RegexComponent>(
119-
@RegexComponentBuilder with content: () -> R
118+
public func starts(
119+
@RegexComponentBuilder with content: () -> some RegexComponent
120120
) -> Bool {
121121
starts(with: content())
122122
}
@@ -132,8 +132,8 @@ extension BidirectionalCollection where SubSequence == Substring {
132132
/// the start of the collection, the entire contents of this collection
133133
/// are returned.
134134
@available(SwiftStdlib 5.7, *)
135-
public func trimmingPrefix<R: RegexComponent>(
136-
@RegexComponentBuilder _ content: () -> R
135+
public func trimmingPrefix(
136+
@RegexComponentBuilder _ content: () -> some RegexComponent
137137
) -> SubSequence {
138138
trimmingPrefix(content())
139139
}
@@ -145,9 +145,9 @@ extension BidirectionalCollection where SubSequence == Substring {
145145
/// - Returns: The first match for the regex created by `content` in this
146146
/// collection, or `nil` if no match is found.
147147
@available(SwiftStdlib 5.7, *)
148-
public func firstMatch<R: RegexComponent>(
149-
@RegexComponentBuilder of content: () -> R
150-
) -> Regex<R.RegexOutput>.Match? {
148+
public func firstMatch<Output>(
149+
@RegexComponentBuilder of content: () -> some RegexComponent<Output>
150+
) -> Regex<Output>.Match? {
151151
firstMatch(of: content())
152152
}
153153

@@ -159,9 +159,9 @@ extension BidirectionalCollection where SubSequence == Substring {
159159
/// - Returns: A collection of matches for the regex returned by `content`.
160160
/// If no matches are found, the returned collection is empty.
161161
@available(SwiftStdlib 5.7, *)
162-
public func matches<R: RegexComponent>(
163-
@RegexComponentBuilder of content: () -> R
164-
) -> [Regex<R.RegexOutput>.Match] {
162+
public func matches<Output>(
163+
@RegexComponentBuilder of content: () -> some RegexComponent<Output>
164+
) -> [Regex<Output>.Match] {
165165
matches(of: content())
166166
}
167167
}
@@ -175,8 +175,8 @@ where Self: BidirectionalCollection, SubSequence == Substring {
175175
/// - Parameter content: A closure that returns the regex to search for
176176
/// at the start of this collection.
177177
@available(SwiftStdlib 5.7, *)
178-
public mutating func trimPrefix<R: RegexComponent>(
179-
@RegexComponentBuilder _ content: () -> R
178+
public mutating func trimPrefix(
179+
@RegexComponentBuilder _ content: () -> some RegexComponent
180180
) {
181181
trimPrefix(content())
182182
}
@@ -196,11 +196,11 @@ where Self: BidirectionalCollection, SubSequence == Substring {
196196
/// - Returns: A new collection in which all matches for regex in `subrange`
197197
/// are replaced by `replacement`, using `content` to create the regex.
198198
@available(SwiftStdlib 5.7, *)
199-
public func replacing<R: RegexComponent, Replacement: Collection>(
199+
public func replacing<Replacement: Collection>(
200200
with replacement: Replacement,
201201
subrange: Range<Index>,
202202
maxReplacements: Int = .max,
203-
@RegexComponentBuilder content: () -> R
203+
@RegexComponentBuilder content: () -> some RegexComponent
204204
) -> Self where Replacement.Element == Element {
205205
replacing(content(), with: replacement, subrange: subrange, maxReplacements: maxReplacements)
206206
}
@@ -218,10 +218,10 @@ where Self: BidirectionalCollection, SubSequence == Substring {
218218
/// - Returns: A new collection in which all matches for regex in `subrange`
219219
/// are replaced by `replacement`, using `content` to create the regex.
220220
@available(SwiftStdlib 5.7, *)
221-
public func replacing<R: RegexComponent, Replacement: Collection>(
221+
public func replacing<Replacement: Collection>(
222222
with replacement: Replacement,
223223
maxReplacements: Int = .max,
224-
@RegexComponentBuilder content: () -> R
224+
@RegexComponentBuilder content: () -> some RegexComponent
225225
) -> Self where Replacement.Element == Element {
226226
replacing(content(), with: replacement, maxReplacements: maxReplacements)
227227
}
@@ -237,10 +237,10 @@ where Self: BidirectionalCollection, SubSequence == Substring {
237237
/// - content: A closure that returns the collection to search for
238238
/// and replace.
239239
@available(SwiftStdlib 5.7, *)
240-
public mutating func replace<R: RegexComponent, Replacement: Collection>(
240+
public mutating func replace<Replacement: Collection>(
241241
with replacement: Replacement,
242242
maxReplacements: Int = .max,
243-
@RegexComponentBuilder content: () -> R
243+
@RegexComponentBuilder content: () -> some RegexComponent
244244
) where Replacement.Element == Element {
245245
replace(content(), with: replacement, maxReplacements: maxReplacements)
246246
}
@@ -262,11 +262,11 @@ where Self: BidirectionalCollection, SubSequence == Substring {
262262
/// are replaced by the result of calling `replacement`, where regex
263263
/// is the result of calling `content`.
264264
@available(SwiftStdlib 5.7, *)
265-
public func replacing<R: RegexComponent, Replacement: Collection>(
265+
public func replacing<Output, Replacement: Collection>(
266266
subrange: Range<Index>,
267267
maxReplacements: Int = .max,
268-
@RegexComponentBuilder content: () -> R,
269-
with replacement: (Regex<R.RegexOutput>.Match) throws -> Replacement
268+
@RegexComponentBuilder content: () -> some RegexComponent<Output>,
269+
with replacement: (Regex<Output>.Match) throws -> Replacement
270270
) rethrows -> Self where Replacement.Element == Element {
271271
try replacing(content(), subrange: subrange, maxReplacements: maxReplacements, with: replacement)
272272
}
@@ -286,10 +286,10 @@ where Self: BidirectionalCollection, SubSequence == Substring {
286286
/// are replaced by the result of calling `replacement`, where regex is
287287
/// the result of calling `content`.
288288
@available(SwiftStdlib 5.7, *)
289-
public func replacing<R: RegexComponent, Replacement: Collection>(
289+
public func replacing<Output, Replacement: Collection>(
290290
maxReplacements: Int = .max,
291-
@RegexComponentBuilder content: () -> R,
292-
with replacement: (Regex<R.RegexOutput>.Match) throws -> Replacement
291+
@RegexComponentBuilder content: () -> some RegexComponent<Output>,
292+
with replacement: (Regex<Output>.Match) throws -> Replacement
293293
) rethrows -> Self where Replacement.Element == Element {
294294
try replacing(content(), maxReplacements: maxReplacements, with: replacement)
295295
}
@@ -305,11 +305,39 @@ where Self: BidirectionalCollection, SubSequence == Substring {
305305
/// - replacement: A closure that receives the full match information,
306306
/// including captures, and returns a replacement collection.
307307
@available(SwiftStdlib 5.7, *)
308-
public mutating func replace<R: RegexComponent, Replacement: Collection>(
308+
public mutating func replace<Output, Replacement: Collection>(
309309
maxReplacements: Int = .max,
310-
@RegexComponentBuilder content: () -> R,
311-
with replacement: (Regex<R.RegexOutput>.Match) throws -> Replacement
310+
@RegexComponentBuilder content: () -> some RegexComponent<Output>,
311+
with replacement: (Regex<Output>.Match) throws -> Replacement
312312
) rethrows where Replacement.Element == Element {
313313
try replace(content(), maxReplacements: maxReplacements, with: replacement)
314314
}
315315
}
316+
317+
// String split overload breakers
318+
319+
extension StringProtocol where SubSequence == Substring {
320+
@available(SwiftStdlib 5.7, *)
321+
public func split(
322+
separator: String,
323+
maxSplits: Int = .max,
324+
omittingEmptySubsequences: Bool = true
325+
) -> [Substring] {
326+
return _split(
327+
separator: separator,
328+
maxSplits: maxSplits,
329+
omittingEmptySubsequences: omittingEmptySubsequences)
330+
}
331+
332+
@available(SwiftStdlib 5.7, *)
333+
public func split(
334+
separator: Substring,
335+
maxSplits: Int = .max,
336+
omittingEmptySubsequences: Bool = true
337+
) -> [Substring] {
338+
return _split(
339+
separator: separator,
340+
maxSplits: maxSplits,
341+
omittingEmptySubsequences: omittingEmptySubsequences)
342+
}
343+
}

Sources/RegexTester/RegexTester.swift

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// This source file is part of the Swift.org open source project
4+
//
5+
// Copyright (c) 2021-2022 Apple Inc. and the Swift project authors
6+
// Licensed under Apache License v2.0 with Runtime Library Exception
7+
//
8+
// See https://swift.org/LICENSE.txt for license information
9+
//
10+
//===----------------------------------------------------------------------===//
11+
12+
import ArgumentParser
13+
import _RegexParser
14+
import _StringProcessing
15+
16+
@main
17+
@available(macOS 9999, *)
18+
struct RegexTester: ParsableCommand {
19+
typealias MatchFunctionType = (String) throws -> Regex<AnyRegexOutput>.Match?
20+
21+
@Argument(help: "The regex pattern to test.")
22+
var pattern: String
23+
24+
@Argument(help: "One or more input strings to test against <pattern>.")
25+
var inputs: [String]
26+
27+
@Flag(
28+
name: [.customShort("p"), .customLong("partial")],
29+
help: "Allow partial matches.")
30+
var allowPartialMatch: Bool = false
31+
32+
mutating func run() throws {
33+
print("Using pattern \(pattern.halfWidthCornerQuoted)")
34+
let regex = try Regex(pattern)
35+
36+
for input in inputs {
37+
print("Input \(input.halfWidthCornerQuoted)")
38+
39+
let matchFunc: MatchFunctionType = allowPartialMatch
40+
? regex.firstMatch(in:)
41+
: regex.wholeMatch(in:)
42+
43+
if let result = try matchFunc(input) {
44+
print(" matched: \(result.0.halfWidthCornerQuoted)")
45+
} else {
46+
print(" no match")
47+
}
48+
}
49+
}
50+
}

Sources/_RegexParser/Regex/AST/Atom.swift

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -441,28 +441,55 @@ extension AST.Atom.CharacterProperty {
441441

442442
/// Character age, as per UnicodeScalar.Properties.age.
443443
case age(major: Int, minor: Int)
444-
444+
445+
/// A block property.
446+
case block(Unicode.Block)
447+
445448
case posix(Unicode.POSIXProperty)
446449

447450
/// Some special properties implemented by PCRE and Oniguruma.
448451
case pcreSpecial(PCRESpecialCategory)
449-
case onigurumaSpecial(OnigurumaSpecialProperty)
450-
452+
453+
/// Some special properties implemented by Java.
454+
case javaSpecial(JavaSpecial)
455+
451456
public enum MapKind: Hashable, Sendable {
452457
case lowercase
453458
case uppercase
454459
case titlecase
455460
}
456461
}
457462

458-
// TODO: erm, separate out or fold into something? splat it in?
459463
public enum PCRESpecialCategory: String, Hashable, Sendable {
460464
case alphanumeric = "Xan"
461465
case posixSpace = "Xps"
462466
case perlSpace = "Xsp"
463467
case universallyNamed = "Xuc"
464468
case perlWord = "Xwd"
465469
}
470+
471+
/// Special Java properties that correspond to methods on
472+
/// `java.lang.Character`, with the `java` prefix replaced by `is`.
473+
public enum JavaSpecial: String, Hashable, CaseIterable, Sendable {
474+
case alphabetic = "javaAlphabetic"
475+
case defined = "javaDefined"
476+
case digit = "javaDigit"
477+
case identifierIgnorable = "javaIdentifierIgnorable"
478+
case ideographic = "javaIdeographic"
479+
case isoControl = "javaISOControl"
480+
case javaIdentifierPart = "javaJavaIdentifierPart" // not a typo, that's actually the name
481+
case javaIdentifierStart = "javaJavaIdentifierStart" // not a typo, that's actually the name
482+
case javaLetter = "javaLetter"
483+
case javaLetterOrDigit = "javaLetterOrDigit"
484+
case lowerCase = "javaLowerCase"
485+
case mirrored = "javaMirrored"
486+
case spaceChar = "javaSpaceChar"
487+
case titleCase = "javaTitleCase"
488+
case unicodeIdentifierPart = "javaUnicodeIdentifierPart"
489+
case unicodeIdentifierStart = "javaUnicodeIdentifierStart"
490+
case upperCase = "javaUpperCase"
491+
case whitespace = "javaWhitespace"
492+
}
466493
}
467494

468495
extension AST.Atom {

0 commit comments

Comments
 (0)