Skip to content

Commit 29bc5da

Browse files
authored
Merge pull request swiftlang#309 from hamishknight/parser-changes-5.7
2 parents fc46753 + 771e735 commit 29bc5da

19 files changed

+720
-560
lines changed

Documentation/Evolution/RegexSyntaxRunTimeConstruction.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -339,7 +339,7 @@ UnicodeScalar -> '\u{' HexDigit{1...} '}'
339339
| '\o{' OctalDigit{1...} '}'
340340
| '\0' OctalDigit{0...3}
341341
342-
HexDigit -> [0-9a-zA-Z]
342+
HexDigit -> [0-9a-fA-F]
343343
OctalDigit -> [0-7]
344344
345345
NamedScalar -> '\N{' ScalarName '}'

Sources/_RegexParser/Regex/AST/Atom.swift

Lines changed: 44 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,9 @@ extension AST {
7272

7373
// (*ACCEPT), (*FAIL), ...
7474
case backtrackingDirective(BacktrackingDirective)
75+
76+
// (?i), (?i-m), ...
77+
case changeMatchingOptions(MatchingOptionSequence)
7578
}
7679
}
7780
}
@@ -91,6 +94,7 @@ extension AST.Atom {
9194
case .subpattern(let v): return v
9295
case .callout(let v): return v
9396
case .backtrackingDirective(let v): return v
97+
case .changeMatchingOptions(let v): return v
9498
case .any: return nil
9599
case .startOfLine: return nil
96100
case .endOfLine: return nil
@@ -397,9 +401,6 @@ extension AST.Atom.CharacterProperty {
397401
/// Some special properties implemented by PCRE and Oniguruma.
398402
case pcreSpecial(PCRESpecialCategory)
399403
case onigurumaSpecial(OnigurumaSpecialProperty)
400-
401-
/// Unhandled properties.
402-
case other(key: String?, value: String)
403404
}
404405

405406
// TODO: erm, separate out or fold into something? splat it in?
@@ -631,6 +632,41 @@ extension AST.Atom {
631632
}
632633
}
633634

635+
extension AST.Atom.EscapedBuiltin {
636+
/// If the escape sequence represents a unicode scalar value, returns the
637+
/// value, otherwise `nil`.
638+
public var scalarValue: UnicodeScalar? {
639+
switch self {
640+
// TODO: Should we separate these into a separate enum? Or move the
641+
// specifics of the scalar to the DSL tree?
642+
case .alarm:
643+
return "\u{7}"
644+
case .backspace:
645+
return "\u{8}"
646+
case .escape:
647+
return "\u{1B}"
648+
case .formfeed:
649+
return "\u{C}"
650+
case .newline:
651+
return "\n"
652+
case .carriageReturn:
653+
return "\r"
654+
case .tab:
655+
return "\t"
656+
657+
case .singleDataUnit, .decimalDigit, .notDecimalDigit,
658+
.horizontalWhitespace, .notHorizontalWhitespace, .notNewline,
659+
.newlineSequence, .whitespace, .notWhitespace, .verticalTab,
660+
.notVerticalTab, .wordCharacter, .notWordCharacter, .graphemeCluster,
661+
.wordBoundary, .notWordBoundary, .startOfSubject,
662+
.endOfSubjectBeforeNewline, .endOfSubject,
663+
.firstMatchingPositionInSubject, .resetStartOfMatch, .trueAnychar,
664+
.textSegment, .notTextSegment:
665+
return nil
666+
}
667+
}
668+
}
669+
634670
extension AST.Atom {
635671
/// Retrieve the character value of the atom if it represents a literal
636672
/// character or unicode scalar, nil otherwise.
@@ -642,34 +678,7 @@ extension AST.Atom {
642678
return Character(s)
643679

644680
case .escaped(let c):
645-
switch c {
646-
// TODO: Should we separate these into a separate enum? Or move the
647-
// specifics of the scalar to the DSL tree?
648-
case .alarm:
649-
return "\u{7}"
650-
case .backspace:
651-
return "\u{8}"
652-
case .escape:
653-
return "\u{1B}"
654-
case .formfeed:
655-
return "\u{C}"
656-
case .newline:
657-
return "\n"
658-
case .carriageReturn:
659-
return "\r"
660-
case .tab:
661-
return "\t"
662-
663-
case .singleDataUnit, .decimalDigit, .notDecimalDigit,
664-
.horizontalWhitespace, .notHorizontalWhitespace, .notNewline,
665-
.newlineSequence, .whitespace, .notWhitespace, .verticalTab,
666-
.notVerticalTab, .wordCharacter, .notWordCharacter, .graphemeCluster,
667-
.wordBoundary, .notWordBoundary, .startOfSubject,
668-
.endOfSubjectBeforeNewline, .endOfSubject,
669-
.firstMatchingPositionInSubject, .resetStartOfMatch, .trueAnychar,
670-
.textSegment, .notTextSegment:
671-
return nil
672-
}
681+
return c.scalarValue.map(Character.init)
673682

674683
case .keyboardControl, .keyboardMeta, .keyboardMetaControl:
675684
// TODO: These should have unicode scalar values.
@@ -683,7 +692,7 @@ extension AST.Atom {
683692
return nil
684693

685694
case .property, .any, .startOfLine, .endOfLine, .backreference, .subpattern,
686-
.callout, .backtrackingDirective:
695+
.callout, .backtrackingDirective, .changeMatchingOptions:
687696
return nil
688697
}
689698
}
@@ -723,7 +732,7 @@ extension AST.Atom {
723732

724733
case .property, .escaped, .any, .startOfLine, .endOfLine,
725734
.backreference, .subpattern, .namedCharacter, .callout,
726-
.backtrackingDirective:
735+
.backtrackingDirective, .changeMatchingOptions:
727736
return nil
728737
}
729738
}
@@ -732,6 +741,8 @@ extension AST.Atom {
732741
switch kind {
733742
case .backtrackingDirective(let b):
734743
return b.isQuantifiable
744+
case .changeMatchingOptions:
745+
return false
735746
// TODO: Are callouts quantifiable?
736747
default:
737748
return true

Sources/_RegexParser/Regex/AST/CustomCharClass.swift

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,14 +97,18 @@ extension CustomCC.Member {
9797
if case .trivia = self { return true }
9898
return false
9999
}
100+
101+
public var isSemantic: Bool {
102+
!isTrivia
103+
}
100104
}
101105

102106
extension AST.CustomCharacterClass {
103107
/// Strip trivia from the character class members. This does not recurse into
104108
/// nested custom character classes.
105109
public var strippingTriviaShallow: Self {
106110
var copy = self
107-
copy.members = copy.members.filter { !$0.isTrivia }
111+
copy.members = copy.members.filter(\.isSemantic)
108112
return copy
109113
}
110114
}

Sources/_RegexParser/Regex/AST/Group.swift

Lines changed: 1 addition & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -68,9 +68,7 @@ extension AST {
6868
case atomicScriptRun
6969

7070
// (?iJmnsUxxxDPSWy{..}-iJmnsUxxxDPSW:)
71-
// Isolated options are written as e.g (?i), and implicitly form a group
72-
// containing all the following elements of the current group.
73-
case changeMatchingOptions(MatchingOptionSequence, isIsolated: Bool)
71+
case changeMatchingOptions(MatchingOptionSequence)
7472

7573
// NOTE: Comments appear to be groups, but are not parsed
7674
// the same. They parse more like quotes, so are not
@@ -87,21 +85,6 @@ extension AST.Group.Kind {
8785
}
8886
}
8987

90-
/// Whether this is a group with an implicit scope, e.g isolated matching
91-
/// options implicitly become parent groups for the rest of the elements in
92-
/// the current group:
93-
///
94-
/// (a(?i)bc)de -> (a(?i:bc))de
95-
///
96-
public var hasImplicitScope: Bool {
97-
switch self {
98-
case .changeMatchingOptions(_, let isIsolated):
99-
return isIsolated
100-
default:
101-
return false
102-
}
103-
}
104-
10588
/// If this is a named group, its name, `nil` otherwise.
10689
public var name: String? {
10790
switch self {

Sources/_RegexParser/Regex/Parse/CharacterPropertyClassification.swift

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -397,8 +397,9 @@ extension Source {
397397
return .pcreSpecial(pcreSpecial)
398398
}
399399

400-
// Otherwise we don't know what this is.
401-
return .other(key: nil, value: value)
400+
// TODO: This should be versioned, and do we want a more lax behavior for
401+
// the runtime?
402+
throw ParseError.unknownProperty(key: nil, value: value)
402403
}
403404

404405
static func classifyCharacterProperty(
@@ -435,6 +436,8 @@ extension Source {
435436
if let match = match {
436437
return match
437438
}
438-
return .other(key: key, value: value)
439+
// TODO: This should be versioned, and do we want a more lax behavior for
440+
// the runtime?
441+
throw ParseError.unknownProperty(key: key, value: value)
439442
}
440443
}

Sources/_RegexParser/Regex/Parse/Diagnostics.swift

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,8 @@ enum ParseError: Error, Hashable {
5757
case expectedCustomCharacterClassMembers
5858
case invalidCharacterClassRangeOperand
5959

60-
case invalidPOSIXSetName(String)
6160
case emptyProperty
61+
case unknownProperty(key: String?, value: String)
6262

6363
case expectedGroupSpecifier
6464
case unbalancedEndOfGroup
@@ -142,10 +142,13 @@ extension ParseError: CustomStringConvertible {
142142
return "expected custom character class members"
143143
case .invalidCharacterClassRangeOperand:
144144
return "invalid character class range"
145-
case let .invalidPOSIXSetName(n):
146-
return "invalid character set name: '\(n)'"
147145
case .emptyProperty:
148146
return "empty property"
147+
case .unknownProperty(let key, let value):
148+
if let key = key {
149+
return "unknown character property '\(key)=\(value)'"
150+
}
151+
return "unknown character property '\(value)'"
149152
case .expectedGroupSpecifier:
150153
return "expected group specifier"
151154
case .unbalancedEndOfGroup:

0 commit comments

Comments
 (0)