Skip to content

[Regex] Infer 'Match' type of regex literals. #40717

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 6, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 15 additions & 1 deletion include/swift/AST/ASTContext.h
Original file line number Diff line number Diff line change
Expand Up @@ -368,6 +368,12 @@ class ASTContext final {
/// i.e. true if the entry is [key: alias_name, value: (real_name, true)].
mutable llvm::DenseMap<Identifier, std::pair<Identifier, bool>> ModuleAliasMap;

/// The maximum arity of `_StringProcessing.Tuple{n}`.
static constexpr unsigned StringProcessingTupleDeclMaxArity = 8;
/// Cached `_StringProcessing.Tuple{n}` declarations.
mutable SmallVector<StructDecl *, StringProcessingTupleDeclMaxArity - 2>
StringProcessingTupleDecls;

/// Retrieve the allocator for the given arena.
llvm::BumpPtrAllocator &
getAllocator(AllocationArena arena = AllocationArena::Permanent) const;
Expand Down Expand Up @@ -623,7 +629,15 @@ class ASTContext final {

/// Retrieve _StringProcessing.Regex.init(_regexString: String, version: Int).
ConcreteDeclRef getRegexInitDecl(Type regexType) const;


/// Retrieve the max arity that `_StringProcessing.Tuple{arity}` was
/// instantiated for.
unsigned getStringProcessingTupleDeclMaxArity() const;

/// Retrieve the `_StringProcessing.Tuple{arity}` declaration for the given
/// arity.
StructDecl *getStringProcessingTupleDecl(unsigned arity) const;

/// Retrieve the declaration of Swift.<(Int, Int) -> Bool.
FuncDecl *getLessThanIntDecl() const;

Expand Down
3 changes: 3 additions & 0 deletions include/swift/AST/DiagnosticsSema.def
Original file line number Diff line number Diff line change
Expand Up @@ -4772,6 +4772,9 @@ ERROR(string_processing_lib_missing,none,
ERROR(regex_capture_types_failed_to_decode,none,
"failed to decode capture types for regular expression literal; this may "
"be a compiler bug", ())
ERROR(regex_too_many_captures,none,
"too many captures in regular expression literal; the current limit is "
"%0", (unsigned))

//------------------------------------------------------------------------------
// MARK: Type Check Types
Expand Down
25 changes: 24 additions & 1 deletion lib/AST/ASTContext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1234,7 +1234,30 @@ ConcreteDeclRef ASTContext::getRegexInitDecl(Type regexType) const {
return ConcreteDeclRef(foundDecl, subs);
}

static
unsigned ASTContext::getStringProcessingTupleDeclMaxArity() const {
return StringProcessingTupleDeclMaxArity;
}

StructDecl *ASTContext::getStringProcessingTupleDecl(unsigned arity) const {
assert(arity >= 2);
if (arity > StringProcessingTupleDeclMaxArity)
return nullptr;
if (StringProcessingTupleDecls.empty())
StringProcessingTupleDecls.append(
StringProcessingTupleDeclMaxArity - 1, nullptr);
auto &decl = StringProcessingTupleDecls[arity - 2];
if (decl)
return decl;
SmallVector<ValueDecl *, 1> results;
auto *spModule = getLoadedModule(Id_StringProcessing);
auto typeName = getIdentifier("Tuple" + llvm::utostr(arity));
spModule->lookupQualified(
spModule, DeclNameRef(typeName), NL_OnlyTypes, results);
assert(results.size() == 1);
return (decl = cast<StructDecl>(results[0]));
}

static
FuncDecl *getBinaryComparisonOperatorIntDecl(const ASTContext &C, StringRef op,
FuncDecl *&cached) {
if (cached)
Expand Down
21 changes: 15 additions & 6 deletions lib/Sema/CSGen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1267,19 +1267,28 @@ namespace {
ctx.Id_Regex.str());
return Type();
}
SmallVector<TupleTypeElt, 4> captureTypes;
SmallVector<Type, 4> matchTypes {ctx.getSubstringType()};
if (decodeRegexCaptureTypes(ctx,
E->getSerializedCaptureStructure(),
/*atomType*/ ctx.getSubstringType(),
captureTypes)) {
matchTypes)) {
ctx.Diags.diagnose(E->getLoc(),
diag::regex_capture_types_failed_to_decode);
return Type();
}
auto genericArg = captureTypes.size() == 1
? captureTypes[0].getRawType()
: TupleType::get(captureTypes, ctx);
return BoundGenericStructType::get(regexDecl, Type(), {genericArg});
if (matchTypes.size() == 1)
return BoundGenericStructType::get(
regexDecl, Type(), matchTypes.front());
// Form a `_StringProcessing.Tuple{n}<...>`.
auto *tupleDecl = ctx.getStringProcessingTupleDecl(matchTypes.size());
if (!tupleDecl) {
ctx.Diags.diagnose(E->getLoc(), diag::regex_too_many_captures,
ctx.getStringProcessingTupleDeclMaxArity() - 1);
return Type();
}
auto matchType = BoundGenericStructType::get(
tupleDecl, Type(), matchTypes);
return BoundGenericStructType::get(regexDecl, Type(), {matchType});
}

Type visitDeclRefExpr(DeclRefExpr *E) {
Expand Down
21 changes: 15 additions & 6 deletions lib/Sema/TypeCheckRegex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,14 @@ using namespace swift;
// 〚`name: T` (atom)〛 ==> .atom, `name`, '\0'
// 〚`[T]`〛 ==> 〚`T`〛, .formArray
// 〚`T?`〛 ==> 〚`T`〛, .formOptional
// 〚`(T0, T1, ...)` (top level)〛 ==> 〚`T0`〛, 〚`T1`〛, ...
// 〚`(T0, T1, ...)`〛 ==> .beginTuple, 〚`T0`〛, 〚`T1`〛, ..., .endTuple
//
// For details, see apple/swift-experimental-string-processing.
bool swift::decodeRegexCaptureTypes(ASTContext &ctx,
ArrayRef<uint8_t> serialization,
Type atomType,
SmallVectorImpl<TupleTypeElt> &result) {
SmallVectorImpl<Type> &result) {
using Version = RegexLiteralExpr::CaptureStructureSerializationVersion;
static const Version implVersion = 1;
unsigned size = serialization.size();
Expand All @@ -45,7 +46,7 @@ bool swift::decodeRegexCaptureTypes(ASTContext &ctx,
if (version != implVersion)
return true;
// Read contents.
SmallVector<SmallVector<TupleTypeElt, 4>, 4> scopes(1);
SmallVector<SmallVector<Type, 4>, 4> scopes(1);
unsigned offset = sizeof(Version);
auto consumeCode = [&]() -> Optional<RegexCaptureStructureCode> {
auto rawValue = serialization[offset];
Expand All @@ -72,26 +73,34 @@ bool swift::decodeRegexCaptureTypes(ASTContext &ctx,
if (length >= size - offset)
return true; // Unterminated string.
StringRef name(namePtr, length);
scopes.back().push_back(TupleTypeElt(atomType, ctx.getIdentifier(name)));
// The name is currently unused becuase we are forming a nominal
// `Tuple{n}` type. We will switch back to native tuples when there is
// variadic generics.
(void)name;
scopes.back().push_back(atomType);
offset += length + /*NUL*/ 1;
break;
}
case RegexCaptureStructureCode::FormArray: {
auto &type = scopes.back().back();
type = TupleTypeElt(ArraySliceType::get(type.getRawType()));
type = ArraySliceType::get(type);
break;
}
case RegexCaptureStructureCode::FormOptional: {
auto &type = scopes.back().back();
type = TupleTypeElt(OptionalType::get(type.getRawType()));
type = OptionalType::get(type);
break;
}
case RegexCaptureStructureCode::BeginTuple:
scopes.push_back({});
break;
case RegexCaptureStructureCode::EndTuple: {
auto children = scopes.pop_back_val();
scopes.back().push_back(TupleType::get(children, ctx));
if (children.size() > ctx.getStringProcessingTupleDeclMaxArity())
return true;
auto tupleDecl = ctx.getStringProcessingTupleDecl(children.size());
auto type = BoundGenericStructType::get(tupleDecl, Type(), children);
scopes.back().push_back(type);
break;
}
case RegexCaptureStructureCode::CaseCount:
Expand Down
2 changes: 1 addition & 1 deletion lib/Sema/TypeCheckRegex.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ enum class RegexCaptureStructureCode: uint8_t {
bool decodeRegexCaptureTypes(ASTContext &ctx,
llvm::ArrayRef<uint8_t> serialization,
Type atomType,
llvm::SmallVectorImpl<TupleTypeElt> &result);
llvm::SmallVectorImpl<Type> &result);

} // end namespace swift

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// RUN: %target-typecheck-verify-swift -enable-experimental-string-processing
// REQUIRES: libswift
// REQUIRES: swift_in_compiler

// Note there is purposefully no trailing newline here.
// expected-error@+1 {{unterminated regex literal}}
Expand Down
18 changes: 10 additions & 8 deletions test/StringProcessing/Runtime/regex_basic.swift
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// RUN: %target-run-simple-swift(-Xfrontend -enable-experimental-string-processing)

// REQUIRES: libswift,string_processing,executable_test
// REQUIRES: swift_in_compiler,string_processing,executable_test

import StdlibUnittest

Expand All @@ -25,19 +25,19 @@ RegexBasicTests.test("Basic") {

let match1 = input.expectMatch('/aabcc./')
expectEqual("aabccd", input[match1.range])
expectTrue(() == match1.captures)
expectTrue("aabccd" == match1.match)

let match2 = input.expectMatch('/a*b.+./')
expectEqual("aabccd", input[match2.range])
expectTrue(() == match2.captures)
expectTrue("aabccd" == match2.match)
}

RegexBasicTests.test("Modern") {
let input = "aabccd"

let match1 = input.expectMatch('|a a bc c /*hello*/ .|')
expectEqual("aabccd", input[match1.range])
expectTrue(() == match1.captures)
expectTrue("aabccd" == match1.match)
}

RegexBasicTests.test("Captures") {
Expand All @@ -47,12 +47,14 @@ RegexBasicTests.test("Captures") {
"""
let regex = '/([0-9A-F]+)(?:\.\.([0-9A-F]+))?\s+;\s+(\w+).*/'
// Test inferred type.
let _: Regex<(Substring, Substring?, Substring)>.Type = type(of: regex)
let _: Regex<Tuple4<Substring, Substring, Substring?, Substring>>.Type
= type(of: regex)
let match1 = input.expectMatch(regex)
expectEqual(input[...], input[match1.range])
expectTrue("A6F0" == match1.captures.0)
expectTrue("A6F1" == match1.captures.1)
expectTrue("Extend" == match1.captures.2)
expectTrue(input == match1.0)
expectTrue("A6F0" == match1.1)
expectTrue("A6F1" == match1.2)
expectTrue("Extend" == match1.3)
}

runAllTests()
4 changes: 2 additions & 2 deletions test/StringProcessing/SILGen/regex_literal_silgen.swift
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,5 @@ var s = '/abc/'
// CHECK: [[INT_INIT:%[0-9]+]] = function_ref @$sSi22_builtinIntegerLiteralSiBI_tcfC : $@convention(method) (Builtin.IntLiteral, @thin Int.Type) -> Int
// CHECK: [[VERSION_INT:%[0-9]+]] = apply [[INT_INIT]]([[VERSION_LITERAL]]

// CHECK: [[REGEX_INIT:%[0-9]+]] = function_ref @$s17_StringProcessing5RegexV06_regexA07versionACyxGSS_SitcfC : $@convention(method) <τ_0_0> (@owned String, Int, @thin Regex<τ_0_0>.Type) -> @out Regex<τ_0_0>
// CHECK: apply [[REGEX_INIT]]<{{.+}}>({{%.+}}, [[REGEX_STR]], [[VERSION_INT]], {{%.+}}) : $@convention(method) <τ_0_0> (@owned String, Int, @thin Regex<τ_0_0>.Type) -> @out Regex<τ_0_0>
// CHECK: [[REGEX_INIT:%[0-9]+]] = function_ref @$s17_StringProcessing5RegexV06_regexA07versionACyxGSS_SitcfC : $@convention(method) <τ_0_0 where τ_0_0 : MatchProtocol> (@owned String, Int, @thin Regex<τ_0_0>.Type) -> @out Regex<τ_0_0>
// CHECK: apply [[REGEX_INIT]]<{{.+}}>({{%.+}}, [[REGEX_STR]], [[VERSION_INT]], {{%.+}}) : $@convention(method) <τ_0_0 where τ_0_0 : MatchProtocol> (@owned String, Int, @thin Regex<τ_0_0>.Type) -> @out Regex<τ_0_0>
31 changes: 18 additions & 13 deletions test/StringProcessing/Sema/regex_literal_type_inference.swift
Original file line number Diff line number Diff line change
Expand Up @@ -2,44 +2,49 @@
// REQUIRES: swift_in_compiler

let r0 = '/./'
let _: Regex<()> = r0
let _: Regex<Substring> = r0

func takesRegex<Match>(_: Regex<Match>) {}
takesRegex('//') // okay

let r1 = '/.(.)/'
// Note: We test its type with a separate statement so that we know the type
// checker inferred the regex's type independently without contextual types.
let _: Regex<Substring>.Type = type(of: r1)
let _: Regex<Tuple2<Substring, Substring>>.Type = type(of: r1)

struct S {}
// expected-error @+2 {{cannot assign value of type 'Regex<Substring>' to type 'Regex<S>'}}
// expected-note @+1 {{arguments to generic parameter 'Capture' ('Substring' and 'S') are expected to be equal}}
struct S: MatchProtocol {
typealias Capture = Substring
}
// expected-error @+2 {{cannot assign value of type 'Regex<Tuple2<Substring, Substring>>' to type 'Regex<S>'}}
// expected-note @+1 {{arguments to generic parameter 'Match' ('Tuple2<Substring, Substring>' and 'S') are expected to be equal}}
let r2: Regex<S> = '/.(.)/'

let r3 = '/(.)(.)/'
let _: Regex<(Substring, Substring)>.Type = type(of: r3)
let _: Regex<Tuple3<Substring, Substring, Substring>>.Type = type(of: r3)

let r4 = '/(?<label>.)(.)/'
let _: Regex<(label: Substring, Substring)>.Type = type(of: r4)
let _: Regex<Tuple3<Substring, Substring, Substring>>.Type = type(of: r4)

let r5 = '/(.(.(.)))/'
let _: Regex<(Substring, Substring, Substring)>.Type = type(of: r5)
let _: Regex<Tuple4<Substring, Substring, Substring, Substring>>.Type = type(of: r5)

let r6 = '/(?'we'.(?'are'.(?'regex'.)))/'
let _: Regex<(we: Substring, are: Substring, regex: Substring)>.Type = type(of: r6)
let _: Regex<Tuple4<Substring, Substring, Substring, Substring>>.Type = type(of: r6)

let r7 = '/(?:(?:(.(.(.)*)?))*?)?/'
// ^ 1
// ^ 2
// ^ 3
let _: Regex<([Substring]?, [Substring?]?, [[Substring]?]?)>.Type = type(of: r7)
let _: Regex<Tuple4<Substring, [Substring]?, [Substring?]?, [[Substring]?]?>>.Type = type(of: r7)

let r8 = '/well(?<theres_no_single_element_tuple_what_can_we>do)/'
let _: Regex<Substring>.Type = type(of: r8)
let _: Regex<Tuple2<Substring, Substring>>.Type = type(of: r8)

let r9 = '/(a)|(b)|(c)|d/'
let _: Regex<(Substring?, Substring?, Substring?)>.Type = type(of: r9)
let _: Regex<Tuple4<Substring, Substring?, Substring?, Substring?>>.Type = type(of: r9)

let r10 = '/(a)|b/'
let _: Regex<Substring?>.Type = type(of: r10)
let _: Regex<Tuple2<Substring, Substring?>>.Type = type(of: r10)

// expected-error @+1 {{too many captures in regular expression literal; the current limit is 7}}
let r11 = '/()()()()()()()()/' // 8 captures, too many for our prototype
6 changes: 3 additions & 3 deletions utils/update_checkout/update-checkout-config.json
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@
"swift-cmark-gfm": "gfm",
"swift-nio": "2.31.2",
"swift-nio-ssl": "2.15.0",
"swift-experimental-string-processing": "dev/4"
"swift-experimental-string-processing": "dev/5"
}
},
"rebranch": {
Expand Down Expand Up @@ -157,7 +157,7 @@
"sourcekit-lsp": "main",
"swift-format": "main",
"swift-installer-scripts": "main",
"swift-experimental-string-processing": "dev/4"
"swift-experimental-string-processing": "dev/5"
}
},
"release/5.6": {
Expand Down Expand Up @@ -308,7 +308,7 @@
"sourcekit-lsp": "main",
"swift-format": "main",
"swift-installer-scripts": "main",
"swift-experimental-string-processing": "dev/4"
"swift-experimental-string-processing": "dev/5"
}
},
"release/5.4": {
Expand Down