Skip to content

Commit 2cf8817

Browse files
authored
Merge pull request #40717 from rxwei/regex-match
2 parents 493a8c5 + e5175d5 commit 2cf8817

File tree

11 files changed

+107
-42
lines changed

11 files changed

+107
-42
lines changed

include/swift/AST/ASTContext.h

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -368,6 +368,12 @@ class ASTContext final {
368368
/// i.e. true if the entry is [key: alias_name, value: (real_name, true)].
369369
mutable llvm::DenseMap<Identifier, std::pair<Identifier, bool>> ModuleAliasMap;
370370

371+
/// The maximum arity of `_StringProcessing.Tuple{n}`.
372+
static constexpr unsigned StringProcessingTupleDeclMaxArity = 8;
373+
/// Cached `_StringProcessing.Tuple{n}` declarations.
374+
mutable SmallVector<StructDecl *, StringProcessingTupleDeclMaxArity - 2>
375+
StringProcessingTupleDecls;
376+
371377
/// Retrieve the allocator for the given arena.
372378
llvm::BumpPtrAllocator &
373379
getAllocator(AllocationArena arena = AllocationArena::Permanent) const;
@@ -623,7 +629,15 @@ class ASTContext final {
623629

624630
/// Retrieve _StringProcessing.Regex.init(_regexString: String, version: Int).
625631
ConcreteDeclRef getRegexInitDecl(Type regexType) const;
626-
632+
633+
/// Retrieve the max arity that `_StringProcessing.Tuple{arity}` was
634+
/// instantiated for.
635+
unsigned getStringProcessingTupleDeclMaxArity() const;
636+
637+
/// Retrieve the `_StringProcessing.Tuple{arity}` declaration for the given
638+
/// arity.
639+
StructDecl *getStringProcessingTupleDecl(unsigned arity) const;
640+
627641
/// Retrieve the declaration of Swift.<(Int, Int) -> Bool.
628642
FuncDecl *getLessThanIntDecl() const;
629643

include/swift/AST/DiagnosticsSema.def

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4772,6 +4772,9 @@ ERROR(string_processing_lib_missing,none,
47724772
ERROR(regex_capture_types_failed_to_decode,none,
47734773
"failed to decode capture types for regular expression literal; this may "
47744774
"be a compiler bug", ())
4775+
ERROR(regex_too_many_captures,none,
4776+
"too many captures in regular expression literal; the current limit is "
4777+
"%0", (unsigned))
47754778

47764779
//------------------------------------------------------------------------------
47774780
// MARK: Type Check Types

lib/AST/ASTContext.cpp

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1235,7 +1235,30 @@ ConcreteDeclRef ASTContext::getRegexInitDecl(Type regexType) const {
12351235
return ConcreteDeclRef(foundDecl, subs);
12361236
}
12371237

1238-
static
1238+
unsigned ASTContext::getStringProcessingTupleDeclMaxArity() const {
1239+
return StringProcessingTupleDeclMaxArity;
1240+
}
1241+
1242+
StructDecl *ASTContext::getStringProcessingTupleDecl(unsigned arity) const {
1243+
assert(arity >= 2);
1244+
if (arity > StringProcessingTupleDeclMaxArity)
1245+
return nullptr;
1246+
if (StringProcessingTupleDecls.empty())
1247+
StringProcessingTupleDecls.append(
1248+
StringProcessingTupleDeclMaxArity - 1, nullptr);
1249+
auto &decl = StringProcessingTupleDecls[arity - 2];
1250+
if (decl)
1251+
return decl;
1252+
SmallVector<ValueDecl *, 1> results;
1253+
auto *spModule = getLoadedModule(Id_StringProcessing);
1254+
auto typeName = getIdentifier("Tuple" + llvm::utostr(arity));
1255+
spModule->lookupQualified(
1256+
spModule, DeclNameRef(typeName), NL_OnlyTypes, results);
1257+
assert(results.size() == 1);
1258+
return (decl = cast<StructDecl>(results[0]));
1259+
}
1260+
1261+
static
12391262
FuncDecl *getBinaryComparisonOperatorIntDecl(const ASTContext &C, StringRef op,
12401263
FuncDecl *&cached) {
12411264
if (cached)

lib/Sema/CSGen.cpp

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1267,19 +1267,28 @@ namespace {
12671267
ctx.Id_Regex.str());
12681268
return Type();
12691269
}
1270-
SmallVector<TupleTypeElt, 4> captureTypes;
1270+
SmallVector<Type, 4> matchTypes {ctx.getSubstringType()};
12711271
if (decodeRegexCaptureTypes(ctx,
12721272
E->getSerializedCaptureStructure(),
12731273
/*atomType*/ ctx.getSubstringType(),
1274-
captureTypes)) {
1274+
matchTypes)) {
12751275
ctx.Diags.diagnose(E->getLoc(),
12761276
diag::regex_capture_types_failed_to_decode);
12771277
return Type();
12781278
}
1279-
auto genericArg = captureTypes.size() == 1
1280-
? captureTypes[0].getRawType()
1281-
: TupleType::get(captureTypes, ctx);
1282-
return BoundGenericStructType::get(regexDecl, Type(), {genericArg});
1279+
if (matchTypes.size() == 1)
1280+
return BoundGenericStructType::get(
1281+
regexDecl, Type(), matchTypes.front());
1282+
// Form a `_StringProcessing.Tuple{n}<...>`.
1283+
auto *tupleDecl = ctx.getStringProcessingTupleDecl(matchTypes.size());
1284+
if (!tupleDecl) {
1285+
ctx.Diags.diagnose(E->getLoc(), diag::regex_too_many_captures,
1286+
ctx.getStringProcessingTupleDeclMaxArity() - 1);
1287+
return Type();
1288+
}
1289+
auto matchType = BoundGenericStructType::get(
1290+
tupleDecl, Type(), matchTypes);
1291+
return BoundGenericStructType::get(regexDecl, Type(), {matchType});
12831292
}
12841293

12851294
Type visitDeclRefExpr(DeclRefExpr *E) {

lib/Sema/TypeCheckRegex.cpp

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,13 +26,14 @@ using namespace swift;
2626
// 〚`name: T` (atom)〛 ==> .atom, `name`, '\0'
2727
// 〚`[T]`〛 ==> 〚`T`〛, .formArray
2828
// 〚`T?`〛 ==> 〚`T`〛, .formOptional
29+
// 〚`(T0, T1, ...)` (top level)〛 ==> 〚`T0`〛, 〚`T1`〛, ...
2930
// 〚`(T0, T1, ...)`〛 ==> .beginTuple, 〚`T0`〛, 〚`T1`〛, ..., .endTuple
3031
//
3132
// For details, see apple/swift-experimental-string-processing.
3233
bool swift::decodeRegexCaptureTypes(ASTContext &ctx,
3334
ArrayRef<uint8_t> serialization,
3435
Type atomType,
35-
SmallVectorImpl<TupleTypeElt> &result) {
36+
SmallVectorImpl<Type> &result) {
3637
using Version = RegexLiteralExpr::CaptureStructureSerializationVersion;
3738
static const Version implVersion = 1;
3839
unsigned size = serialization.size();
@@ -45,7 +46,7 @@ bool swift::decodeRegexCaptureTypes(ASTContext &ctx,
4546
if (version != implVersion)
4647
return true;
4748
// Read contents.
48-
SmallVector<SmallVector<TupleTypeElt, 4>, 4> scopes(1);
49+
SmallVector<SmallVector<Type, 4>, 4> scopes(1);
4950
unsigned offset = sizeof(Version);
5051
auto consumeCode = [&]() -> Optional<RegexCaptureStructureCode> {
5152
auto rawValue = serialization[offset];
@@ -72,26 +73,34 @@ bool swift::decodeRegexCaptureTypes(ASTContext &ctx,
7273
if (length >= size - offset)
7374
return true; // Unterminated string.
7475
StringRef name(namePtr, length);
75-
scopes.back().push_back(TupleTypeElt(atomType, ctx.getIdentifier(name)));
76+
// The name is currently unused becuase we are forming a nominal
77+
// `Tuple{n}` type. We will switch back to native tuples when there is
78+
// variadic generics.
79+
(void)name;
80+
scopes.back().push_back(atomType);
7681
offset += length + /*NUL*/ 1;
7782
break;
7883
}
7984
case RegexCaptureStructureCode::FormArray: {
8085
auto &type = scopes.back().back();
81-
type = TupleTypeElt(ArraySliceType::get(type.getRawType()));
86+
type = ArraySliceType::get(type);
8287
break;
8388
}
8489
case RegexCaptureStructureCode::FormOptional: {
8590
auto &type = scopes.back().back();
86-
type = TupleTypeElt(OptionalType::get(type.getRawType()));
91+
type = OptionalType::get(type);
8792
break;
8893
}
8994
case RegexCaptureStructureCode::BeginTuple:
9095
scopes.push_back({});
9196
break;
9297
case RegexCaptureStructureCode::EndTuple: {
9398
auto children = scopes.pop_back_val();
94-
scopes.back().push_back(TupleType::get(children, ctx));
99+
if (children.size() > ctx.getStringProcessingTupleDeclMaxArity())
100+
return true;
101+
auto tupleDecl = ctx.getStringProcessingTupleDecl(children.size());
102+
auto type = BoundGenericStructType::get(tupleDecl, Type(), children);
103+
scopes.back().push_back(type);
95104
break;
96105
}
97106
case RegexCaptureStructureCode::CaseCount:

lib/Sema/TypeCheckRegex.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ enum class RegexCaptureStructureCode: uint8_t {
4040
bool decodeRegexCaptureTypes(ASTContext &ctx,
4141
llvm::ArrayRef<uint8_t> serialization,
4242
Type atomType,
43-
llvm::SmallVectorImpl<TupleTypeElt> &result);
43+
llvm::SmallVectorImpl<Type> &result);
4444

4545
} // end namespace swift
4646

test/StringProcessing/Parse/regex_parse_end_of_buffer.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
// RUN: %target-typecheck-verify-swift -enable-experimental-string-processing
2-
// REQUIRES: libswift
2+
// REQUIRES: swift_in_compiler
33

44
// Note there is purposefully no trailing newline here.
55
// expected-error@+1 {{unterminated regex literal}}

test/StringProcessing/Runtime/regex_basic.swift

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
// RUN: %target-run-simple-swift(-Xfrontend -enable-experimental-string-processing)
22

3-
// REQUIRES: libswift,string_processing,executable_test
3+
// REQUIRES: swift_in_compiler,string_processing,executable_test
44

55
import StdlibUnittest
66

@@ -25,19 +25,19 @@ RegexBasicTests.test("Basic") {
2525

2626
let match1 = input.expectMatch('/aabcc./')
2727
expectEqual("aabccd", input[match1.range])
28-
expectTrue(() == match1.captures)
28+
expectTrue("aabccd" == match1.match)
2929

3030
let match2 = input.expectMatch('/a*b.+./')
3131
expectEqual("aabccd", input[match2.range])
32-
expectTrue(() == match2.captures)
32+
expectTrue("aabccd" == match2.match)
3333
}
3434

3535
RegexBasicTests.test("Modern") {
3636
let input = "aabccd"
3737

3838
let match1 = input.expectMatch('|a a bc c /*hello*/ .|')
3939
expectEqual("aabccd", input[match1.range])
40-
expectTrue(() == match1.captures)
40+
expectTrue("aabccd" == match1.match)
4141
}
4242

4343
RegexBasicTests.test("Captures") {
@@ -47,12 +47,14 @@ RegexBasicTests.test("Captures") {
4747
"""
4848
let regex = '/([0-9A-F]+)(?:\.\.([0-9A-F]+))?\s+;\s+(\w+).*/'
4949
// Test inferred type.
50-
let _: Regex<(Substring, Substring?, Substring)>.Type = type(of: regex)
50+
let _: Regex<Tuple4<Substring, Substring, Substring?, Substring>>.Type
51+
= type(of: regex)
5152
let match1 = input.expectMatch(regex)
5253
expectEqual(input[...], input[match1.range])
53-
expectTrue("A6F0" == match1.captures.0)
54-
expectTrue("A6F1" == match1.captures.1)
55-
expectTrue("Extend" == match1.captures.2)
54+
expectTrue(input == match1.0)
55+
expectTrue("A6F0" == match1.1)
56+
expectTrue("A6F1" == match1.2)
57+
expectTrue("Extend" == match1.3)
5658
}
5759

5860
runAllTests()

test/StringProcessing/SILGen/regex_literal_silgen.swift

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,5 +10,5 @@ var s = '/abc/'
1010
// CHECK: [[INT_INIT:%[0-9]+]] = function_ref @$sSi22_builtinIntegerLiteralSiBI_tcfC : $@convention(method) (Builtin.IntLiteral, @thin Int.Type) -> Int
1111
// CHECK: [[VERSION_INT:%[0-9]+]] = apply [[INT_INIT]]([[VERSION_LITERAL]]
1212

13-
// CHECK: [[REGEX_INIT:%[0-9]+]] = function_ref @$s17_StringProcessing5RegexV06_regexA07versionACyxGSS_SitcfC : $@convention(method) <τ_0_0> (@owned String, Int, @thin Regex<τ_0_0>.Type) -> @out Regex<τ_0_0>
14-
// CHECK: apply [[REGEX_INIT]]<{{.+}}>({{%.+}}, [[REGEX_STR]], [[VERSION_INT]], {{%.+}}) : $@convention(method) <τ_0_0> (@owned String, Int, @thin Regex<τ_0_0>.Type) -> @out Regex<τ_0_0>
13+
// CHECK: [[REGEX_INIT:%[0-9]+]] = function_ref @$s17_StringProcessing5RegexV06_regexA07versionACyxGSS_SitcfC : $@convention(method) <τ_0_0 where τ_0_0 : MatchProtocol> (@owned String, Int, @thin Regex<τ_0_0>.Type) -> @out Regex<τ_0_0>
14+
// CHECK: apply [[REGEX_INIT]]<{{.+}}>({{%.+}}, [[REGEX_STR]], [[VERSION_INT]], {{%.+}}) : $@convention(method) <τ_0_0 where τ_0_0 : MatchProtocol> (@owned String, Int, @thin Regex<τ_0_0>.Type) -> @out Regex<τ_0_0>

test/StringProcessing/Sema/regex_literal_type_inference.swift

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,44 +2,49 @@
22
// REQUIRES: swift_in_compiler
33

44
let r0 = '/./'
5-
let _: Regex<()> = r0
5+
let _: Regex<Substring> = r0
66

77
func takesRegex<Match>(_: Regex<Match>) {}
88
takesRegex('//') // okay
99

1010
let r1 = '/.(.)/'
1111
// Note: We test its type with a separate statement so that we know the type
1212
// checker inferred the regex's type independently without contextual types.
13-
let _: Regex<Substring>.Type = type(of: r1)
13+
let _: Regex<Tuple2<Substring, Substring>>.Type = type(of: r1)
1414

15-
struct S {}
16-
// expected-error @+2 {{cannot assign value of type 'Regex<Substring>' to type 'Regex<S>'}}
17-
// expected-note @+1 {{arguments to generic parameter 'Capture' ('Substring' and 'S') are expected to be equal}}
15+
struct S: MatchProtocol {
16+
typealias Capture = Substring
17+
}
18+
// expected-error @+2 {{cannot assign value of type 'Regex<Tuple2<Substring, Substring>>' to type 'Regex<S>'}}
19+
// expected-note @+1 {{arguments to generic parameter 'Match' ('Tuple2<Substring, Substring>' and 'S') are expected to be equal}}
1820
let r2: Regex<S> = '/.(.)/'
1921

2022
let r3 = '/(.)(.)/'
21-
let _: Regex<(Substring, Substring)>.Type = type(of: r3)
23+
let _: Regex<Tuple3<Substring, Substring, Substring>>.Type = type(of: r3)
2224

2325
let r4 = '/(?<label>.)(.)/'
24-
let _: Regex<(label: Substring, Substring)>.Type = type(of: r4)
26+
let _: Regex<Tuple3<Substring, Substring, Substring>>.Type = type(of: r4)
2527

2628
let r5 = '/(.(.(.)))/'
27-
let _: Regex<(Substring, Substring, Substring)>.Type = type(of: r5)
29+
let _: Regex<Tuple4<Substring, Substring, Substring, Substring>>.Type = type(of: r5)
2830

2931
let r6 = '/(?'we'.(?'are'.(?'regex'.)))/'
30-
let _: Regex<(we: Substring, are: Substring, regex: Substring)>.Type = type(of: r6)
32+
let _: Regex<Tuple4<Substring, Substring, Substring, Substring>>.Type = type(of: r6)
3133

3234
let r7 = '/(?:(?:(.(.(.)*)?))*?)?/'
3335
// ^ 1
3436
// ^ 2
3537
// ^ 3
36-
let _: Regex<([Substring]?, [Substring?]?, [[Substring]?]?)>.Type = type(of: r7)
38+
let _: Regex<Tuple4<Substring, [Substring]?, [Substring?]?, [[Substring]?]?>>.Type = type(of: r7)
3739

3840
let r8 = '/well(?<theres_no_single_element_tuple_what_can_we>do)/'
39-
let _: Regex<Substring>.Type = type(of: r8)
41+
let _: Regex<Tuple2<Substring, Substring>>.Type = type(of: r8)
4042

4143
let r9 = '/(a)|(b)|(c)|d/'
42-
let _: Regex<(Substring?, Substring?, Substring?)>.Type = type(of: r9)
44+
let _: Regex<Tuple4<Substring, Substring?, Substring?, Substring?>>.Type = type(of: r9)
4345

4446
let r10 = '/(a)|b/'
45-
let _: Regex<Substring?>.Type = type(of: r10)
47+
let _: Regex<Tuple2<Substring, Substring?>>.Type = type(of: r10)
48+
49+
// expected-error @+1 {{too many captures in regular expression literal; the current limit is 7}}
50+
let r11 = '/()()()()()()()()/' // 8 captures, too many for our prototype

utils/update_checkout/update-checkout-config.json

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@
123123
"swift-cmark-gfm": "gfm",
124124
"swift-nio": "2.31.2",
125125
"swift-nio-ssl": "2.15.0",
126-
"swift-experimental-string-processing": "dev/4"
126+
"swift-experimental-string-processing": "dev/5"
127127
}
128128
},
129129
"rebranch": {
@@ -157,7 +157,7 @@
157157
"sourcekit-lsp": "main",
158158
"swift-format": "main",
159159
"swift-installer-scripts": "main",
160-
"swift-experimental-string-processing": "dev/4"
160+
"swift-experimental-string-processing": "dev/5"
161161
}
162162
},
163163
"release/5.6": {
@@ -308,7 +308,7 @@
308308
"sourcekit-lsp": "main",
309309
"swift-format": "main",
310310
"swift-installer-scripts": "main",
311-
"swift-experimental-string-processing": "dev/4"
311+
"swift-experimental-string-processing": "dev/5"
312312
}
313313
},
314314
"release/5.4": {

0 commit comments

Comments
 (0)