Skip to content

Commit 3efcebf

Browse files
authored
Merge pull request #41325 from apple/revert-41275-regex-update-dev-6
Revert "[Regex] Switch regex match to Swift tuples."
2 parents 30e2945 + dcd9e8e commit 3efcebf

File tree

12 files changed

+94
-41
lines changed

12 files changed

+94
-41
lines changed

include/swift/AST/ASTContext.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -367,6 +367,12 @@ class ASTContext final {
367367
/// i.e. true if the entry is [key: alias_name, value: (real_name, true)].
368368
mutable llvm::DenseMap<Identifier, std::pair<Identifier, bool>> ModuleAliasMap;
369369

370+
/// The maximum arity of `_StringProcessing.Tuple{n}`.
371+
static constexpr unsigned StringProcessingTupleDeclMaxArity = 8;
372+
/// Cached `_StringProcessing.Tuple{n}` declarations.
373+
mutable SmallVector<StructDecl *, StringProcessingTupleDeclMaxArity - 2>
374+
StringProcessingTupleDecls;
375+
370376
/// Retrieve the allocator for the given arena.
371377
llvm::BumpPtrAllocator &
372378
getAllocator(AllocationArena arena = AllocationArena::Permanent) const;
@@ -623,6 +629,14 @@ class ASTContext final {
623629
/// Retrieve _StringProcessing.Regex.init(_regexString: String, version: Int).
624630
ConcreteDeclRef getRegexInitDecl(Type regexType) const;
625631

632+
/// Retrieve the max arity that `_StringProcessing.Tuple{arity}` was
633+
/// instantiated for.
634+
unsigned getStringProcessingTupleDeclMaxArity() const;
635+
636+
/// Retrieve the `_StringProcessing.Tuple{arity}` declaration for the given
637+
/// arity.
638+
StructDecl *getStringProcessingTupleDecl(unsigned arity) const;
639+
626640
/// Retrieve the declaration of Swift.<(Int, Int) -> Bool.
627641
FuncDecl *getLessThanIntDecl() const;
628642

include/swift/AST/DiagnosticsSema.def

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4832,6 +4832,9 @@ ERROR(string_processing_lib_missing,none,
48324832
ERROR(regex_capture_types_failed_to_decode,none,
48334833
"failed to decode capture types for regular expression literal; this may "
48344834
"be a compiler bug", ())
4835+
ERROR(regex_too_many_captures,none,
4836+
"too many captures in regular expression literal; the current limit is "
4837+
"%0", (unsigned))
48354838

48364839
//------------------------------------------------------------------------------
48374840
// MARK: Type Check Types

include/swift/AST/KnownSDKTypes.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,5 +49,6 @@ KNOWN_SDK_TYPE_DECL(Distributed, RemoteCallTarget, StructDecl, 0)
4949

5050
// String processing
5151
KNOWN_SDK_TYPE_DECL(StringProcessing, Regex, StructDecl, 1)
52+
KNOWN_SDK_TYPE_DECL(StringProcessing, DynamicCaptures, EnumDecl, 0)
5253

5354
#undef KNOWN_SDK_TYPE_DECL

lib/AST/ASTContext.cpp

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1225,6 +1225,29 @@ ConcreteDeclRef ASTContext::getRegexInitDecl(Type regexType) const {
12251225
return ConcreteDeclRef(foundDecl, subs);
12261226
}
12271227

1228+
unsigned ASTContext::getStringProcessingTupleDeclMaxArity() const {
1229+
return StringProcessingTupleDeclMaxArity;
1230+
}
1231+
1232+
StructDecl *ASTContext::getStringProcessingTupleDecl(unsigned arity) const {
1233+
assert(arity >= 2);
1234+
if (arity > StringProcessingTupleDeclMaxArity)
1235+
return nullptr;
1236+
if (StringProcessingTupleDecls.empty())
1237+
StringProcessingTupleDecls.append(
1238+
StringProcessingTupleDeclMaxArity - 1, nullptr);
1239+
auto &decl = StringProcessingTupleDecls[arity - 2];
1240+
if (decl)
1241+
return decl;
1242+
SmallVector<ValueDecl *, 1> results;
1243+
auto *spModule = getLoadedModule(Id_StringProcessing);
1244+
auto typeName = getIdentifier("Tuple" + llvm::utostr(arity));
1245+
spModule->lookupQualified(
1246+
spModule, DeclNameRef(typeName), NL_OnlyTypes, results);
1247+
assert(results.size() == 1);
1248+
return (decl = cast<StructDecl>(results[0]));
1249+
}
1250+
12281251
static
12291252
FuncDecl *getBinaryComparisonOperatorIntDecl(const ASTContext &C, StringRef op,
12301253
FuncDecl *&cached) {

lib/Sema/CSGen.cpp

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1267,20 +1267,27 @@ namespace {
12671267
ctx.Id_Regex.str());
12681268
return Type();
12691269
}
1270-
SmallVector<TupleTypeElt, 4> matchElements {ctx.getSubstringType()};
1270+
SmallVector<Type, 4> matchTypes {ctx.getSubstringType()};
12711271
if (decodeRegexCaptureTypes(ctx,
12721272
E->getSerializedCaptureStructure(),
12731273
/*atomType*/ ctx.getSubstringType(),
1274-
matchElements)) {
1274+
matchTypes)) {
12751275
ctx.Diags.diagnose(E->getLoc(),
12761276
diag::regex_capture_types_failed_to_decode);
12771277
return Type();
12781278
}
1279-
if (matchElements.size() == 1)
1279+
if (matchTypes.size() == 1)
12801280
return BoundGenericStructType::get(
1281-
regexDecl, Type(), matchElements.front().getType());
1282-
// Form a tuple.
1283-
auto matchType = TupleType::get(matchElements, ctx);
1281+
regexDecl, Type(), matchTypes.front());
1282+
// Form a `_StringProcessing.Tuple{n}<...>`.
1283+
auto *tupleDecl = ctx.getStringProcessingTupleDecl(matchTypes.size());
1284+
if (!tupleDecl) {
1285+
ctx.Diags.diagnose(E->getLoc(), diag::regex_too_many_captures,
1286+
ctx.getStringProcessingTupleDeclMaxArity() - 1);
1287+
return Type();
1288+
}
1289+
auto matchType = BoundGenericStructType::get(
1290+
tupleDecl, Type(), matchTypes);
12841291
return BoundGenericStructType::get(regexDecl, Type(), {matchType});
12851292
}
12861293

lib/Sema/TypeCheckRegex.cpp

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ using namespace swift;
3333
bool swift::decodeRegexCaptureTypes(ASTContext &ctx,
3434
ArrayRef<uint8_t> serialization,
3535
Type atomType,
36-
SmallVectorImpl<TupleTypeElt> &result) {
36+
SmallVectorImpl<Type> &result) {
3737
using Version = RegexLiteralExpr::CaptureStructureSerializationVersion;
3838
static const Version implVersion = 1;
3939
unsigned size = serialization.size();
@@ -46,7 +46,7 @@ bool swift::decodeRegexCaptureTypes(ASTContext &ctx,
4646
if (version != implVersion)
4747
return true;
4848
// Read contents.
49-
SmallVector<SmallVector<TupleTypeElt, 4>, 4> scopes(1);
49+
SmallVector<SmallVector<Type, 4>, 4> scopes(1);
5050
unsigned offset = sizeof(Version);
5151
auto consumeCode = [&]() -> Optional<RegexCaptureStructureCode> {
5252
auto rawValue = serialization[offset];
@@ -73,29 +73,33 @@ bool swift::decodeRegexCaptureTypes(ASTContext &ctx,
7373
if (length >= size - offset)
7474
return true; // Unterminated string.
7575
StringRef name(namePtr, length);
76-
scopes.back().push_back(
77-
TupleTypeElt(atomType, ctx.getIdentifier(name)));
76+
// The name is currently unused becuase we are forming a nominal
77+
// `Tuple{n}` type. We will switch back to native tuples when there is
78+
// variadic generics.
79+
(void)name;
80+
scopes.back().push_back(atomType);
7881
offset += length + /*NUL*/ 1;
7982
break;
8083
}
8184
case RegexCaptureStructureCode::FormArray: {
82-
auto &element = scopes.back().back();
83-
element = TupleTypeElt(ArraySliceType::get(element.getType()),
84-
element.getName());
85+
auto &type = scopes.back().back();
86+
type = ArraySliceType::get(type);
8587
break;
8688
}
8789
case RegexCaptureStructureCode::FormOptional: {
88-
auto &element = scopes.back().back();
89-
element = TupleTypeElt(OptionalType::get(element.getType()),
90-
element.getName());
90+
auto &type = scopes.back().back();
91+
type = OptionalType::get(type);
9192
break;
9293
}
9394
case RegexCaptureStructureCode::BeginTuple:
9495
scopes.push_back({});
9596
break;
9697
case RegexCaptureStructureCode::EndTuple: {
9798
auto children = scopes.pop_back_val();
98-
auto type = TupleType::get(children, ctx);
99+
if (children.size() > ctx.getStringProcessingTupleDeclMaxArity())
100+
return true;
101+
auto tupleDecl = ctx.getStringProcessingTupleDecl(children.size());
102+
auto type = BoundGenericStructType::get(tupleDecl, Type(), children);
99103
scopes.back().push_back(type);
100104
break;
101105
}

lib/Sema/TypeCheckRegex.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ enum class RegexCaptureStructureCode: uint8_t {
4040
bool decodeRegexCaptureTypes(ASTContext &ctx,
4141
llvm::ArrayRef<uint8_t> serialization,
4242
Type atomType,
43-
llvm::SmallVectorImpl<TupleTypeElt> &result);
43+
llvm::SmallVectorImpl<Type> &result);
4444

4545
} // end namespace swift
4646

test/StringProcessing/Parse/regex.swift

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,7 @@
44
_ = '/abc/'
55

66
_ = ('/[*/', '/+]/', '/.]/')
7-
// expected-error@-1 {{cannot parse regular expression: quantifier '+' must appear after expression}}
8-
// expected-error@-2 {{cannot parse regular expression: expected ']'}}
7+
// expected-error@-1 {{cannot parse regular expression}}
98

109
_ = '/\w+/'
1110
_ = '/\'\\/'

test/StringProcessing/Runtime/regex_basic.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ RegexBasicTests.test("Captures") {
4747
"""
4848
let regex = '/([0-9A-F]+)(?:\.\.([0-9A-F]+))?\s+;\s+(\w+).*/'
4949
// Test inferred type.
50-
let _: Regex<(Substring, Substring, Substring?, Substring)>.Type
50+
let _: Regex<Tuple4<Substring, Substring, Substring?, Substring>>.Type
5151
= type(of: regex)
5252
let match1 = input.expectMatch(regex)
5353
expectEqual(input[...], input[match1.range])

test/StringProcessing/SILGen/regex_literal_silgen.swift

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,5 +10,5 @@ var s = '/abc/'
1010
// CHECK: [[INT_INIT:%[0-9]+]] = function_ref @$sSi22_builtinIntegerLiteralSiBI_tcfC : $@convention(method) (Builtin.IntLiteral, @thin Int.Type) -> Int
1111
// CHECK: [[VERSION_INT:%[0-9]+]] = apply [[INT_INIT]]([[VERSION_LITERAL]]
1212

13-
// CHECK: [[REGEX_INIT:%[0-9]+]] = function_ref @$s17_StringProcessing5RegexV06_regexA07versionACyxGSS_SitcfC : $@convention(method) <τ_0_0> (@owned String, Int, @thin Regex<τ_0_0>.Type) -> @out Regex<τ_0_0>
14-
// CHECK: apply [[REGEX_INIT]]<{{.+}}>({{%.+}}, [[REGEX_STR]], [[VERSION_INT]], {{%.+}}) : $@convention(method) <τ_0_0> (@owned String, Int, @thin Regex<τ_0_0>.Type) -> @out Regex<τ_0_0>
13+
// CHECK: [[REGEX_INIT:%[0-9]+]] = function_ref @$s17_StringProcessing5RegexV06_regexA07versionACyxGSS_SitcfC : $@convention(method) <τ_0_0 where τ_0_0 : MatchProtocol> (@owned String, Int, @thin Regex<τ_0_0>.Type) -> @out Regex<τ_0_0>
14+
// CHECK: apply [[REGEX_INIT]]<{{.+}}>({{%.+}}, [[REGEX_STR]], [[VERSION_INT]], {{%.+}}) : $@convention(method) <τ_0_0 where τ_0_0 : MatchProtocol> (@owned String, Int, @thin Regex<τ_0_0>.Type) -> @out Regex<τ_0_0>

test/StringProcessing/Sema/regex_literal_type_inference.swift

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -10,39 +10,41 @@ takesRegex('//') // okay
1010
let r1 = '/.(.)/'
1111
// Note: We test its type with a separate statement so that we know the type
1212
// checker inferred the regex's type independently without contextual types.
13-
let _: Regex<(Substring, Substring)>.Type = type(of: r1)
13+
let _: Regex<Tuple2<Substring, Substring>>.Type = type(of: r1)
1414

15-
struct S {}
16-
// expected-error @+2 {{cannot assign value of type 'Regex<(Substring, Substring)>' to type 'Regex<S>'}}
17-
// expected-note @+1 {{arguments to generic parameter 'Match' ('(Substring, Substring)' and 'S') are expected to be equal}}
15+
struct S: MatchProtocol {
16+
typealias Capture = Substring
17+
}
18+
// expected-error @+2 {{cannot assign value of type 'Regex<Tuple2<Substring, Substring>>' to type 'Regex<S>'}}
19+
// expected-note @+1 {{arguments to generic parameter 'Match' ('Tuple2<Substring, Substring>' and 'S') are expected to be equal}}
1820
let r2: Regex<S> = '/.(.)/'
1921

2022
let r3 = '/(.)(.)/'
21-
let _: Regex<(Substring, Substring, Substring)>.Type = type(of: r3)
23+
let _: Regex<Tuple3<Substring, Substring, Substring>>.Type = type(of: r3)
2224

2325
let r4 = '/(?<label>.)(.)/'
24-
let _: Regex<(Substring, label: Substring, Substring)>.Type = type(of: r4)
26+
let _: Regex<Tuple3<Substring, Substring, Substring>>.Type = type(of: r4)
2527

2628
let r5 = '/(.(.(.)))/'
27-
let _: Regex<(Substring, Substring, Substring, Substring)>.Type = type(of: r5)
29+
let _: Regex<Tuple4<Substring, Substring, Substring, Substring>>.Type = type(of: r5)
2830

29-
let r6 = '/(?'we'.(?'are'.(?'regex'.)+)?)/'
30-
let _: Regex<(Substring, we: Substring, are: Substring?, regex: [Substring]?)>.Type = type(of: r6)
31+
let r6 = '/(?'we'.(?'are'.(?'regex'.)))/'
32+
let _: Regex<Tuple4<Substring, Substring, Substring, Substring>>.Type = type(of: r6)
3133

3234
let r7 = '/(?:(?:(.(.(.)*)?))*?)?/'
3335
// ^ 1
3436
// ^ 2
3537
// ^ 3
36-
let _: Regex<(Substring, [Substring]?, [Substring?]?, [[Substring]?]?)>.Type = type(of: r7)
38+
let _: Regex<Tuple4<Substring, [Substring]?, [Substring?]?, [[Substring]?]?>>.Type = type(of: r7)
3739

3840
let r8 = '/well(?<theres_no_single_element_tuple_what_can_we>do)/'
39-
let _: Regex<(Substring, theres_no_single_element_tuple_what_can_we: Substring)>.Type = type(of: r8)
41+
let _: Regex<Tuple2<Substring, Substring>>.Type = type(of: r8)
4042

4143
let r9 = '/(a)|(b)|(c)|d/'
42-
let _: Regex<(Substring, Substring?, Substring?, Substring?)>.Type = type(of: r9)
44+
let _: Regex<Tuple4<Substring, Substring?, Substring?, Substring?>>.Type = type(of: r9)
4345

4446
let r10 = '/(a)|b/'
45-
let _: Regex<(Substring, Substring?)>.Type = type(of: r10)
47+
let _: Regex<Tuple2<Substring, Substring?>>.Type = type(of: r10)
4648

47-
let r11 = '/()()()()()()()()/'
48-
let _: Regex<(Substring, Substring, Substring, Substring, Substring, Substring, Substring, Substring, Substring)>.Type = type(of: r11)
49+
// expected-error @+1 {{too many captures in regular expression literal; the current limit is 7}}
50+
let r11 = '/()()()()()()()()/' // 8 captures, too many for our prototype

utils/update_checkout/update-checkout-config.json

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@
123123
"swift-cmark-gfm": "gfm",
124124
"swift-nio": "2.31.2",
125125
"swift-nio-ssl": "2.15.0",
126-
"swift-experimental-string-processing": "dev/6"
126+
"swift-experimental-string-processing": "dev/5"
127127
}
128128
},
129129
"rebranch": {
@@ -157,7 +157,7 @@
157157
"sourcekit-lsp": "main",
158158
"swift-format": "main",
159159
"swift-installer-scripts": "main",
160-
"swift-experimental-string-processing": "dev/6"
160+
"swift-experimental-string-processing": "dev/5"
161161
}
162162
},
163163
"release/5.6": {
@@ -308,7 +308,7 @@
308308
"sourcekit-lsp": "main",
309309
"swift-format": "main",
310310
"swift-installer-scripts": "main",
311-
"swift-experimental-string-processing": "dev/6"
311+
"swift-experimental-string-processing": "dev/5"
312312
}
313313
},
314314
"release/5.4": {

0 commit comments

Comments
 (0)