Skip to content

Update regex literal delimiters #41430

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 28, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 16 additions & 12 deletions lib/Parse/Lexer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1959,8 +1959,6 @@ const char *Lexer::findEndOfCurlyQuoteStringLiteral(const char *Body,
}

bool Lexer::tryLexRegexLiteral(const char *TokStart) {
assert(*TokStart == '\'');

// We need to have experimental string processing enabled, and have the
// parsing logic for regex literals available.
if (!LangOpts.EnableExperimentalStringProcessing || !regexLiteralLexingFn)
Expand Down Expand Up @@ -1995,7 +1993,6 @@ bool Lexer::tryLexRegexLiteral(const char *TokStart) {

// Otherwise, we either had a successful lex, or something that was
// recoverable.
assert(ErrStr || CurPtr[-1] == '\'');
formToken(tok::regex_literal, TokStart);
return true;
}
Expand Down Expand Up @@ -2471,8 +2468,16 @@ void Lexer::lexImpl() {
case '\\': return formToken(tok::backslash, TokStart);

case '#':
// Try lex a raw string literal.
if (unsigned CustomDelimiterLen = advanceIfCustomDelimiter(CurPtr, Diags))
return lexStringLiteral(CustomDelimiterLen);

// If we have experimental string processing enabled, try lex a regex
// literal.
if (tryLexRegexLiteral(TokStart))
return;

// Otherwise try lex a magic pound literal.
return lexHash();

// Operator characters.
Expand Down Expand Up @@ -2525,13 +2530,20 @@ void Lexer::lexImpl() {
case '&': case '|': case '^': case '~': case '.':
return lexOperatorIdentifier();

case 'r':
// If we have experimental string processing enabled, try lex a regex
// literal.
if (tryLexRegexLiteral(TokStart))
return;
LLVM_FALLTHROUGH;

case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
case 'V': case 'W': case 'X': case 'Y': case 'Z':
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
case 'o': case 'p': case 'q': /*r above*/ case 's': case 't': case 'u':
case 'v': case 'w': case 'x': case 'y': case 'z':
case '_':
return lexIdentifier();
Expand All @@ -2544,14 +2556,6 @@ void Lexer::lexImpl() {
return lexNumber();

case '\'':
// If we have experimental string processing enabled, and have the parsing
// logic for regex literals, try to lex a single quoted string as a regex
// literal.
if (tryLexRegexLiteral(TokStart))
return;

// Otherwise lex as a string literal and emit a diagnostic.
LLVM_FALLTHROUGH;
case '"':
return lexStringLiteral();

Expand Down
22 changes: 17 additions & 5 deletions test/StringProcessing/Parse/regex.swift
Original file line number Diff line number Diff line change
@@ -1,11 +1,23 @@
// RUN: %target-typecheck-verify-swift -enable-experimental-string-processing
// REQUIRES: swift_in_compiler

_ = '/abc/'
_ = #/abc/#
_ = #|abc|#
_ = re'abc'

_ = ('/[*/', '/+]/', '/.]/')
func foo<T>(_ x: T...) {}
foo(#/abc/#, #|abc|#, re'abc')

let arr = [#/abc/#, #|abc|#, re'abc']

_ = #/\w+/#.self
_ = #|\w+|#.self
_ = re'\w+'.self

_ = #/#/\/\#\\/#
_ = #|#|\|\#\\|#
_ = re're\r\e\'\\'

_ = (#/[*/#, #/+]/#, #/.]/#)
// expected-error@-1 {{cannot parse regular expression: quantifier '+' must appear after expression}}
// expected-error@-2 {{cannot parse regular expression: expected ']'}}

_ = '/\w+/'
_ = '/\'\\/'
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

// Note there is purposefully no trailing newline here.
// expected-error@+1 {{unterminated regex literal}}
var unterminated = '/xy
var unterminated = #/xy
7 changes: 5 additions & 2 deletions test/StringProcessing/Parse/regex_parse_error.swift
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
// RUN: %target-typecheck-verify-swift -enable-experimental-string-processing
// REQUIRES: swift_in_compiler

let s = '/\\/''/ // expected-error {{unterminated regex literal}}
let s = #/\\/''/ // expected-error {{unterminated regex literal}}
_ = #|\| // expected-error {{unterminated regex literal}}
_ = #// // expected-error {{unterminated regex literal}}
_ = re'x // expected-error {{unterminated regex literal}}

// expected-error@+1 {{unterminated regex literal}}
var unterminated = '/xy
var unterminated = #/xy
8 changes: 4 additions & 4 deletions test/StringProcessing/Runtime/regex_basic.swift
Original file line number Diff line number Diff line change
Expand Up @@ -23,19 +23,19 @@ extension String {
RegexBasicTests.test("Basic") {
let input = "aabccd"

let match1 = input.expectMatch('/aabcc./')
let match1 = input.expectMatch(#/aabcc./#)
expectEqual("aabccd", input[match1.range])
expectTrue("aabccd" == match1.match)

let match2 = input.expectMatch('/a*b.+./')
let match2 = input.expectMatch(#/a*b.+./#)
expectEqual("aabccd", input[match2.range])
expectTrue("aabccd" == match2.match)
}

RegexBasicTests.test("Modern") {
let input = "aabccd"

let match1 = input.expectMatch('|a a bc c /*hello*/ .|')
let match1 = input.expectMatch(#|a a bc c /*hello*/ .|#)
expectEqual("aabccd", input[match1.range])
expectTrue("aabccd" == match1.match)
}
Expand All @@ -45,7 +45,7 @@ RegexBasicTests.test("Captures") {
A6F0..A6F1 ; Extend # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM \
COMBINING MARK TUKWENTIS
"""
let regex = '/([0-9A-F]+)(?:\.\.([0-9A-F]+))?\s+;\s+(\w+).*/'
let regex = #/([0-9A-F]+)(?:\.\.([0-9A-F]+))?\s+;\s+(\w+).*/#
// Test inferred type.
let _: Regex<(Substring, Substring, Substring?, Substring)>.Type
= type(of: regex)
Expand Down
4 changes: 2 additions & 2 deletions test/StringProcessing/SILGen/regex_literal_silgen.swift
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
// RUN: %target-swift-frontend -emit-silgen -enable-experimental-string-processing %s | %FileCheck %s
// REQUIRES: swift_in_compiler

var s = '/abc/'
// CHECK: [[REGEX_STR_LITERAL:%[0-9]+]] = string_literal utf8 "'/abc/'"
var s = #/abc/#
// CHECK: [[REGEX_STR_LITERAL:%[0-9]+]] = string_literal utf8 "#/abc/#"
// CHECK: [[STRING_INIT:%[0-9]+]] = function_ref @$sSS21_builtinStringLiteral17utf8CodeUnitCount7isASCIISSBp_BwBi1_tcfC : $@convention(method) (Builtin.RawPointer, Builtin.Word, Builtin.Int1, @thin String.Type) -> @owned String
// CHECK: [[REGEX_STR:%[0-9]+]] = apply [[STRING_INIT]]([[REGEX_STR_LITERAL]]

Expand Down
30 changes: 15 additions & 15 deletions test/StringProcessing/Sema/regex_literal_type_inference.swift
Original file line number Diff line number Diff line change
@@ -1,48 +1,48 @@
// RUN: %target-typecheck-verify-swift -enable-experimental-string-processing
// REQUIRES: swift_in_compiler

let r0 = '/./'
let r0 = #/./#
let _: Regex<Substring> = r0

func takesRegex<Match>(_: Regex<Match>) {}
takesRegex('//') // okay
takesRegex(#//#) // okay

let r1 = '/.(.)/'
let r1 = #/.(.)/#
// Note: We test its type with a separate statement so that we know the type
// checker inferred the regex's type independently without contextual types.
let _: Regex<(Substring, Substring)>.Type = type(of: r1)

struct S {}
// expected-error @+2 {{cannot assign value of type 'Regex<(Substring, Substring)>' to type 'Regex<S>'}}
// expected-note @+1 {{arguments to generic parameter 'Match' ('(Substring, Substring)' and 'S') are expected to be equal}}
let r2: Regex<S> = '/.(.)/'
let r2: Regex<S> = #/.(.)/#

let r3 = '/(.)(.)/'
let r3 = #/(.)(.)/#
let _: Regex<(Substring, Substring, Substring)>.Type = type(of: r3)

let r4 = '/(?<label>.)(.)/'
let r4 = #/(?<label>.)(.)/#
let _: Regex<(Substring, label: Substring, Substring)>.Type = type(of: r4)

let r5 = '/(.(.(.)))/'
let r5 = #/(.(.(.)))/#
let _: Regex<(Substring, Substring, Substring, Substring)>.Type = type(of: r5)

let r6 = '/(?'we'.(?'are'.(?'regex'.)+)?)/'
let _: Regex<(Substring, we: Substring, are: Substring?, regex: [Substring]?)>.Type = type(of: r6)
let r6 = #/(?'we'.(?'are'.(?'regex'.)+)?)/#
let _: Regex<(Substring, we: Substring, are: Substring?, regex: Substring?)>.Type = type(of: r6)

let r7 = '/(?:(?:(.(.(.)*)?))*?)?/'
let r7 = #/(?:(?:(.(.(.)*)?))*?)?/#
// ^ 1
// ^ 2
// ^ 3
let _: Regex<(Substring, [Substring]?, [Substring?]?, [[Substring]?]?)>.Type = type(of: r7)
let _: Regex<(Substring, Substring??, Substring???, Substring????)>.Type = type(of: r7)

let r8 = '/well(?<theres_no_single_element_tuple_what_can_we>do)/'
let r8 = #/well(?<theres_no_single_element_tuple_what_can_we>do)/#
let _: Regex<(Substring, theres_no_single_element_tuple_what_can_we: Substring)>.Type = type(of: r8)

let r9 = '/(a)|(b)|(c)|d/'
let r9 = #/(a)|(b)|(c)|d/#
let _: Regex<(Substring, Substring?, Substring?, Substring?)>.Type = type(of: r9)

let r10 = '/(a)|b/'
let r10 = #/(a)|b/#
let _: Regex<(Substring, Substring?)>.Type = type(of: r10)

let r11 = '/()()()()()()()()/'
let r11 = #/()()()()()()()()/#
let _: Regex<(Substring, Substring, Substring, Substring, Substring, Substring, Substring, Substring, Substring)>.Type = type(of: r11)
6 changes: 3 additions & 3 deletions utils/update_checkout/update-checkout-config.json
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@
"swift-cmark-gfm": "gfm",
"swift-nio": "2.31.2",
"swift-nio-ssl": "2.15.0",
"swift-experimental-string-processing": "dev/6"
"swift-experimental-string-processing": "dev/8"
}
},
"rebranch": {
Expand Down Expand Up @@ -157,7 +157,7 @@
"sourcekit-lsp": "main",
"swift-format": "main",
"swift-installer-scripts": "main",
"swift-experimental-string-processing": "dev/6"
"swift-experimental-string-processing": "dev/8"
}
},
"release/5.6": {
Expand Down Expand Up @@ -308,7 +308,7 @@
"sourcekit-lsp": "main",
"swift-format": "main",
"swift-installer-scripts": "main",
"swift-experimental-string-processing": "dev/6"
"swift-experimental-string-processing": "dev/8"
}
},
"release/5.4": {
Expand Down