Skip to content

Commit 7179a8d

Browse files
authored
Merge pull request #41275 from rxwei/regex-update-dev-6
[Regex] Switch regex match to Swift tuples.
2 parents a825773 + cf8e0fe commit 7179a8d

File tree

12 files changed

+41
-94
lines changed

12 files changed

+41
-94
lines changed

include/swift/AST/ASTContext.h

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -367,12 +367,6 @@ class ASTContext final {
367367
/// i.e. true if the entry is [key: alias_name, value: (real_name, true)].
368368
mutable llvm::DenseMap<Identifier, std::pair<Identifier, bool>> ModuleAliasMap;
369369

370-
/// The maximum arity of `_StringProcessing.Tuple{n}`.
371-
static constexpr unsigned StringProcessingTupleDeclMaxArity = 8;
372-
/// Cached `_StringProcessing.Tuple{n}` declarations.
373-
mutable SmallVector<StructDecl *, StringProcessingTupleDeclMaxArity - 2>
374-
StringProcessingTupleDecls;
375-
376370
/// Retrieve the allocator for the given arena.
377371
llvm::BumpPtrAllocator &
378372
getAllocator(AllocationArena arena = AllocationArena::Permanent) const;
@@ -629,14 +623,6 @@ class ASTContext final {
629623
/// Retrieve _StringProcessing.Regex.init(_regexString: String, version: Int).
630624
ConcreteDeclRef getRegexInitDecl(Type regexType) const;
631625

632-
/// Retrieve the max arity that `_StringProcessing.Tuple{arity}` was
633-
/// instantiated for.
634-
unsigned getStringProcessingTupleDeclMaxArity() const;
635-
636-
/// Retrieve the `_StringProcessing.Tuple{arity}` declaration for the given
637-
/// arity.
638-
StructDecl *getStringProcessingTupleDecl(unsigned arity) const;
639-
640626
/// Retrieve the declaration of Swift.<(Int, Int) -> Bool.
641627
FuncDecl *getLessThanIntDecl() const;
642628

include/swift/AST/DiagnosticsSema.def

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4832,9 +4832,6 @@ ERROR(string_processing_lib_missing,none,
48324832
ERROR(regex_capture_types_failed_to_decode,none,
48334833
"failed to decode capture types for regular expression literal; this may "
48344834
"be a compiler bug", ())
4835-
ERROR(regex_too_many_captures,none,
4836-
"too many captures in regular expression literal; the current limit is "
4837-
"%0", (unsigned))
48384835

48394836
//------------------------------------------------------------------------------
48404837
// MARK: Type Check Types

include/swift/AST/KnownSDKTypes.def

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,5 @@ KNOWN_SDK_TYPE_DECL(Distributed, RemoteCallTarget, StructDecl, 0)
4949

5050
// String processing
5151
KNOWN_SDK_TYPE_DECL(StringProcessing, Regex, StructDecl, 1)
52-
KNOWN_SDK_TYPE_DECL(StringProcessing, DynamicCaptures, EnumDecl, 0)
5352

5453
#undef KNOWN_SDK_TYPE_DECL

lib/AST/ASTContext.cpp

Lines changed: 0 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1225,29 +1225,6 @@ ConcreteDeclRef ASTContext::getRegexInitDecl(Type regexType) const {
12251225
return ConcreteDeclRef(foundDecl, subs);
12261226
}
12271227

1228-
unsigned ASTContext::getStringProcessingTupleDeclMaxArity() const {
1229-
return StringProcessingTupleDeclMaxArity;
1230-
}
1231-
1232-
StructDecl *ASTContext::getStringProcessingTupleDecl(unsigned arity) const {
1233-
assert(arity >= 2);
1234-
if (arity > StringProcessingTupleDeclMaxArity)
1235-
return nullptr;
1236-
if (StringProcessingTupleDecls.empty())
1237-
StringProcessingTupleDecls.append(
1238-
StringProcessingTupleDeclMaxArity - 1, nullptr);
1239-
auto &decl = StringProcessingTupleDecls[arity - 2];
1240-
if (decl)
1241-
return decl;
1242-
SmallVector<ValueDecl *, 1> results;
1243-
auto *spModule = getLoadedModule(Id_StringProcessing);
1244-
auto typeName = getIdentifier("Tuple" + llvm::utostr(arity));
1245-
spModule->lookupQualified(
1246-
spModule, DeclNameRef(typeName), NL_OnlyTypes, results);
1247-
assert(results.size() == 1);
1248-
return (decl = cast<StructDecl>(results[0]));
1249-
}
1250-
12511228
static
12521229
FuncDecl *getBinaryComparisonOperatorIntDecl(const ASTContext &C, StringRef op,
12531230
FuncDecl *&cached) {

lib/Sema/CSGen.cpp

Lines changed: 6 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1267,27 +1267,20 @@ namespace {
12671267
ctx.Id_Regex.str());
12681268
return Type();
12691269
}
1270-
SmallVector<Type, 4> matchTypes {ctx.getSubstringType()};
1270+
SmallVector<TupleTypeElt, 4> matchElements {ctx.getSubstringType()};
12711271
if (decodeRegexCaptureTypes(ctx,
12721272
E->getSerializedCaptureStructure(),
12731273
/*atomType*/ ctx.getSubstringType(),
1274-
matchTypes)) {
1274+
matchElements)) {
12751275
ctx.Diags.diagnose(E->getLoc(),
12761276
diag::regex_capture_types_failed_to_decode);
12771277
return Type();
12781278
}
1279-
if (matchTypes.size() == 1)
1279+
if (matchElements.size() == 1)
12801280
return BoundGenericStructType::get(
1281-
regexDecl, Type(), matchTypes.front());
1282-
// Form a `_StringProcessing.Tuple{n}<...>`.
1283-
auto *tupleDecl = ctx.getStringProcessingTupleDecl(matchTypes.size());
1284-
if (!tupleDecl) {
1285-
ctx.Diags.diagnose(E->getLoc(), diag::regex_too_many_captures,
1286-
ctx.getStringProcessingTupleDeclMaxArity() - 1);
1287-
return Type();
1288-
}
1289-
auto matchType = BoundGenericStructType::get(
1290-
tupleDecl, Type(), matchTypes);
1281+
regexDecl, Type(), matchElements.front().getType());
1282+
// Form a tuple.
1283+
auto matchType = TupleType::get(matchElements, ctx);
12911284
return BoundGenericStructType::get(regexDecl, Type(), {matchType});
12921285
}
12931286

lib/Sema/TypeCheckRegex.cpp

Lines changed: 11 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ using namespace swift;
3333
bool swift::decodeRegexCaptureTypes(ASTContext &ctx,
3434
ArrayRef<uint8_t> serialization,
3535
Type atomType,
36-
SmallVectorImpl<Type> &result) {
36+
SmallVectorImpl<TupleTypeElt> &result) {
3737
using Version = RegexLiteralExpr::CaptureStructureSerializationVersion;
3838
static const Version implVersion = 1;
3939
unsigned size = serialization.size();
@@ -46,7 +46,7 @@ bool swift::decodeRegexCaptureTypes(ASTContext &ctx,
4646
if (version != implVersion)
4747
return true;
4848
// Read contents.
49-
SmallVector<SmallVector<Type, 4>, 4> scopes(1);
49+
SmallVector<SmallVector<TupleTypeElt, 4>, 4> scopes(1);
5050
unsigned offset = sizeof(Version);
5151
auto consumeCode = [&]() -> Optional<RegexCaptureStructureCode> {
5252
auto rawValue = serialization[offset];
@@ -73,33 +73,29 @@ bool swift::decodeRegexCaptureTypes(ASTContext &ctx,
7373
if (length >= size - offset)
7474
return true; // Unterminated string.
7575
StringRef name(namePtr, length);
76-
// The name is currently unused becuase we are forming a nominal
77-
// `Tuple{n}` type. We will switch back to native tuples when there is
78-
// variadic generics.
79-
(void)name;
80-
scopes.back().push_back(atomType);
76+
scopes.back().push_back(
77+
TupleTypeElt(atomType, ctx.getIdentifier(name)));
8178
offset += length + /*NUL*/ 1;
8279
break;
8380
}
8481
case RegexCaptureStructureCode::FormArray: {
85-
auto &type = scopes.back().back();
86-
type = ArraySliceType::get(type);
82+
auto &element = scopes.back().back();
83+
element = TupleTypeElt(ArraySliceType::get(element.getType()),
84+
element.getName());
8785
break;
8886
}
8987
case RegexCaptureStructureCode::FormOptional: {
90-
auto &type = scopes.back().back();
91-
type = OptionalType::get(type);
88+
auto &element = scopes.back().back();
89+
element = TupleTypeElt(OptionalType::get(element.getType()),
90+
element.getName());
9291
break;
9392
}
9493
case RegexCaptureStructureCode::BeginTuple:
9594
scopes.push_back({});
9695
break;
9796
case RegexCaptureStructureCode::EndTuple: {
9897
auto children = scopes.pop_back_val();
99-
if (children.size() > ctx.getStringProcessingTupleDeclMaxArity())
100-
return true;
101-
auto tupleDecl = ctx.getStringProcessingTupleDecl(children.size());
102-
auto type = BoundGenericStructType::get(tupleDecl, Type(), children);
98+
auto type = TupleType::get(children, ctx);
10399
scopes.back().push_back(type);
104100
break;
105101
}

lib/Sema/TypeCheckRegex.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ enum class RegexCaptureStructureCode: uint8_t {
4040
bool decodeRegexCaptureTypes(ASTContext &ctx,
4141
llvm::ArrayRef<uint8_t> serialization,
4242
Type atomType,
43-
llvm::SmallVectorImpl<Type> &result);
43+
llvm::SmallVectorImpl<TupleTypeElt> &result);
4444

4545
} // end namespace swift
4646

test/StringProcessing/Parse/regex.swift

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@
44
_ = '/abc/'
55

66
_ = ('/[*/', '/+]/', '/.]/')
7-
// expected-error@-1 {{cannot parse regular expression}}
7+
// expected-error@-1 {{cannot parse regular expression: quantifier '+' must appear after expression}}
8+
// expected-error@-2 {{cannot parse regular expression: expected ']'}}
89

910
_ = '/\w+/'
1011
_ = '/\'\\/'

test/StringProcessing/Runtime/regex_basic.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ RegexBasicTests.test("Captures") {
4747
"""
4848
let regex = '/([0-9A-F]+)(?:\.\.([0-9A-F]+))?\s+;\s+(\w+).*/'
4949
// Test inferred type.
50-
let _: Regex<Tuple4<Substring, Substring, Substring?, Substring>>.Type
50+
let _: Regex<(Substring, Substring, Substring?, Substring)>.Type
5151
= type(of: regex)
5252
let match1 = input.expectMatch(regex)
5353
expectEqual(input[...], input[match1.range])

test/StringProcessing/SILGen/regex_literal_silgen.swift

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,5 +10,5 @@ var s = '/abc/'
1010
// CHECK: [[INT_INIT:%[0-9]+]] = function_ref @$sSi22_builtinIntegerLiteralSiBI_tcfC : $@convention(method) (Builtin.IntLiteral, @thin Int.Type) -> Int
1111
// CHECK: [[VERSION_INT:%[0-9]+]] = apply [[INT_INIT]]([[VERSION_LITERAL]]
1212

13-
// CHECK: [[REGEX_INIT:%[0-9]+]] = function_ref @$s17_StringProcessing5RegexV06_regexA07versionACyxGSS_SitcfC : $@convention(method) <τ_0_0 where τ_0_0 : MatchProtocol> (@owned String, Int, @thin Regex<τ_0_0>.Type) -> @out Regex<τ_0_0>
14-
// CHECK: apply [[REGEX_INIT]]<{{.+}}>({{%.+}}, [[REGEX_STR]], [[VERSION_INT]], {{%.+}}) : $@convention(method) <τ_0_0 where τ_0_0 : MatchProtocol> (@owned String, Int, @thin Regex<τ_0_0>.Type) -> @out Regex<τ_0_0>
13+
// CHECK: [[REGEX_INIT:%[0-9]+]] = function_ref @$s17_StringProcessing5RegexV06_regexA07versionACyxGSS_SitcfC : $@convention(method) <τ_0_0> (@owned String, Int, @thin Regex<τ_0_0>.Type) -> @out Regex<τ_0_0>
14+
// CHECK: apply [[REGEX_INIT]]<{{.+}}>({{%.+}}, [[REGEX_STR]], [[VERSION_INT]], {{%.+}}) : $@convention(method) <τ_0_0> (@owned String, Int, @thin Regex<τ_0_0>.Type) -> @out Regex<τ_0_0>

test/StringProcessing/Sema/regex_literal_type_inference.swift

Lines changed: 15 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -10,41 +10,39 @@ takesRegex('//') // okay
1010
let r1 = '/.(.)/'
1111
// Note: We test its type with a separate statement so that we know the type
1212
// checker inferred the regex's type independently without contextual types.
13-
let _: Regex<Tuple2<Substring, Substring>>.Type = type(of: r1)
13+
let _: Regex<(Substring, Substring)>.Type = type(of: r1)
1414

15-
struct S: MatchProtocol {
16-
typealias Capture = Substring
17-
}
18-
// expected-error @+2 {{cannot assign value of type 'Regex<Tuple2<Substring, Substring>>' to type 'Regex<S>'}}
19-
// expected-note @+1 {{arguments to generic parameter 'Match' ('Tuple2<Substring, Substring>' and 'S') are expected to be equal}}
15+
struct S {}
16+
// expected-error @+2 {{cannot assign value of type 'Regex<(Substring, Substring)>' to type 'Regex<S>'}}
17+
// expected-note @+1 {{arguments to generic parameter 'Match' ('(Substring, Substring)' and 'S') are expected to be equal}}
2018
let r2: Regex<S> = '/.(.)/'
2119

2220
let r3 = '/(.)(.)/'
23-
let _: Regex<Tuple3<Substring, Substring, Substring>>.Type = type(of: r3)
21+
let _: Regex<(Substring, Substring, Substring)>.Type = type(of: r3)
2422

2523
let r4 = '/(?<label>.)(.)/'
26-
let _: Regex<Tuple3<Substring, Substring, Substring>>.Type = type(of: r4)
24+
let _: Regex<(Substring, label: Substring, Substring)>.Type = type(of: r4)
2725

2826
let r5 = '/(.(.(.)))/'
29-
let _: Regex<Tuple4<Substring, Substring, Substring, Substring>>.Type = type(of: r5)
27+
let _: Regex<(Substring, Substring, Substring, Substring)>.Type = type(of: r5)
3028

31-
let r6 = '/(?'we'.(?'are'.(?'regex'.)))/'
32-
let _: Regex<Tuple4<Substring, Substring, Substring, Substring>>.Type = type(of: r6)
29+
let r6 = '/(?'we'.(?'are'.(?'regex'.)+)?)/'
30+
let _: Regex<(Substring, we: Substring, are: Substring?, regex: [Substring]?)>.Type = type(of: r6)
3331

3432
let r7 = '/(?:(?:(.(.(.)*)?))*?)?/'
3533
// ^ 1
3634
// ^ 2
3735
// ^ 3
38-
let _: Regex<Tuple4<Substring, [Substring]?, [Substring?]?, [[Substring]?]?>>.Type = type(of: r7)
36+
let _: Regex<(Substring, [Substring]?, [Substring?]?, [[Substring]?]?)>.Type = type(of: r7)
3937

4038
let r8 = '/well(?<theres_no_single_element_tuple_what_can_we>do)/'
41-
let _: Regex<Tuple2<Substring, Substring>>.Type = type(of: r8)
39+
let _: Regex<(Substring, theres_no_single_element_tuple_what_can_we: Substring)>.Type = type(of: r8)
4240

4341
let r9 = '/(a)|(b)|(c)|d/'
44-
let _: Regex<Tuple4<Substring, Substring?, Substring?, Substring?>>.Type = type(of: r9)
42+
let _: Regex<(Substring, Substring?, Substring?, Substring?)>.Type = type(of: r9)
4543

4644
let r10 = '/(a)|b/'
47-
let _: Regex<Tuple2<Substring, Substring?>>.Type = type(of: r10)
45+
let _: Regex<(Substring, Substring?)>.Type = type(of: r10)
4846

49-
// expected-error @+1 {{too many captures in regular expression literal; the current limit is 7}}
50-
let r11 = '/()()()()()()()()/' // 8 captures, too many for our prototype
47+
let r11 = '/()()()()()()()()/'
48+
let _: Regex<(Substring, Substring, Substring, Substring, Substring, Substring, Substring, Substring, Substring)>.Type = type(of: r11)

utils/update_checkout/update-checkout-config.json

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@
123123
"swift-cmark-gfm": "gfm",
124124
"swift-nio": "2.31.2",
125125
"swift-nio-ssl": "2.15.0",
126-
"swift-experimental-string-processing": "dev/5"
126+
"swift-experimental-string-processing": "dev/6"
127127
}
128128
},
129129
"rebranch": {
@@ -157,7 +157,7 @@
157157
"sourcekit-lsp": "main",
158158
"swift-format": "main",
159159
"swift-installer-scripts": "main",
160-
"swift-experimental-string-processing": "dev/5"
160+
"swift-experimental-string-processing": "dev/6"
161161
}
162162
},
163163
"release/5.6": {
@@ -308,7 +308,7 @@
308308
"sourcekit-lsp": "main",
309309
"swift-format": "main",
310310
"swift-installer-scripts": "main",
311-
"swift-experimental-string-processing": "dev/5"
311+
"swift-experimental-string-processing": "dev/6"
312312
}
313313
},
314314
"release/5.4": {

0 commit comments

Comments
 (0)