Skip to content

Commit 05363cd

Browse files
committed
Regex literal runtime plumbing.
- Frontend: Implicitly import `_StringProcessing` when frontend flag `-enable-experimental-string-processing` is set. - Type checker: Set a regex literal expression's type as `_StringProcessing.Regex<(Substring, DynamicCaptures)>`. `(Substring, DynamicCaptures)` is a temporary `Match` type that will help get us to an end-to-end working system. This will be replaced by actual type inference based a regex's pattern in a follow-up patch (soon). - SILGen: Lower a regex literal expression to a call to `_StringProcessing.Regex.init(_regexString:)`. - String processing runtime: Add `Regex`, `DynamicCaptures` (matching actual APIs in apple/swift-experimental-string-processing), and `Regex(_regexString:)`. Upcoming: - Build `_MatchingEngine` and `_StringProcessing` modules with sources from apple/swift-experimental-string-processing. - Replace `DynamicCaptures` with inferred capture types.
1 parent 2d12fab commit 05363cd

25 files changed

+164
-102
lines changed

include/swift/AST/ASTContext.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -620,6 +620,9 @@ class ASTContext final {
620620
ConcreteDeclRef getBuiltinInitDecl(NominalTypeDecl *decl,
621621
KnownProtocolKind builtinProtocol,
622622
llvm::function_ref<DeclName (ASTContext &ctx)> initName) const;
623+
624+
/// Retrieve _StringProcessing.Regex.init(_regexString: String).
625+
ConcreteDeclRef getRegexInitDecl(Type regexType) const;
623626

624627
/// Retrieve the declaration of Swift.<(Int, Int) -> Bool.
625628
FuncDecl *getLessThanIntDecl() const;

include/swift/AST/DiagnosticsFrontend.def

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,8 @@ WARNING(emit_reference_dependencies_without_primary_file,none,
145145

146146
WARNING(warn_implicit_concurrency_import_failed,none,
147147
"unable to perform implicit import of \"_Concurrency\" module: no such module found", ())
148+
WARNING(warn_implicit_string_processing_import_failed,none,
149+
"unable to perform implicit import of \"_StringProcessing\" module: no such module found", ())
148150

149151
ERROR(error_module_name_required,none, "-module-name is required", ())
150152
ERROR(error_bad_module_name,none,

include/swift/AST/DiagnosticsSema.def

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3668,9 +3668,6 @@ ERROR(builtin_string_literal_broken_proto,none,
36683668
ERROR(string_literal_broken_proto,none,
36693669
"protocol 'ExpressibleByStringLiteral' is broken", ())
36703670

3671-
ERROR(regex_decl_broken,none,
3672-
"cannot find 'Regex' type in scope", ())
3673-
36743671
// Array literals
36753672
ERROR(should_use_dictionary_literal,none,
36763673
"dictionary of type %0 cannot be %select{used|initialized}1 "
@@ -4722,6 +4719,14 @@ ERROR(async_unavailable_decl,none,
47224719
"%0 %1 is unavailable from asynchronous contexts%select{|; %3}2",
47234720
(DescriptiveDeclKind, DeclBaseName, bool, StringRef))
47244721

4722+
//------------------------------------------------------------------------------
4723+
// MARK: String Processing
4724+
//------------------------------------------------------------------------------
4725+
4726+
ERROR(string_processing_lib_missing,none,
4727+
"missing '%0' declaration, probably because the '_StringProcessing' "
4728+
"module was not imported properly", (StringRef))
4729+
47254730
//------------------------------------------------------------------------------
47264731
// MARK: Type Check Types
47274732
//------------------------------------------------------------------------------

include/swift/AST/Expr.h

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -966,26 +966,18 @@ class InterpolatedStringLiteralExpr : public LiteralExpr {
966966
class RegexLiteralExpr : public LiteralExpr {
967967
SourceLoc Loc;
968968
StringRef RegexText;
969-
Expr *SemanticExpr;
970969

971-
RegexLiteralExpr(SourceLoc loc, StringRef regexText, Expr *semanticExpr,
972-
bool isImplicit)
970+
RegexLiteralExpr(SourceLoc loc, StringRef regexText, bool isImplicit)
973971
: LiteralExpr(ExprKind::RegexLiteral, isImplicit), Loc(loc),
974-
RegexText(regexText), SemanticExpr(semanticExpr) {}
972+
RegexText(regexText) {}
975973

976974
public:
977975
static RegexLiteralExpr *createParsed(ASTContext &ctx, SourceLoc loc,
978-
StringRef regexText,
979-
Expr *semanticExpr);
976+
StringRef regexText);
980977

981978
/// Retrieve the raw regex text.
982979
StringRef getRegexText() const { return RegexText; }
983980

984-
/// Retrieve the semantic expression that the regex will be type-checked and
985-
/// emitted as.
986-
Expr *getSemanticExpr() const { return SemanticExpr; }
987-
void setSemanticExpr(Expr *expr) { SemanticExpr = expr; }
988-
989981
SourceRange getSourceRange() const { return Loc; }
990982

991983
static bool classof(const Expr *E) {

include/swift/AST/KnownIdentifiers.def

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -250,9 +250,10 @@ IDENTIFIER(pullback)
250250
IDENTIFIER(TangentVector)
251251
IDENTIFIER(zero)
252252

253-
// Regex literals
253+
// String processing
254254
IDENTIFIER(Regex)
255-
IDENTIFIER(_regexString)
255+
IDENTIFIER_(regexString)
256+
IDENTIFIER_(StringProcessing)
256257

257258
// Distributed actors
258259
IDENTIFIER(transport)

include/swift/AST/KnownSDKTypes.def

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,4 +48,8 @@ KNOWN_SDK_TYPE_DECL(Distributed, DistributedActor, ProtocolDecl, 0)
4848
KNOWN_SDK_TYPE_DECL(Distributed, ActorIdentity, ProtocolDecl, 0)
4949
KNOWN_SDK_TYPE_DECL(Distributed, AnyActorIdentity, StructDecl, 0)
5050

51+
// String processing
52+
KNOWN_SDK_TYPE_DECL(StringProcessing, Regex, StructDecl, 1)
53+
KNOWN_SDK_TYPE_DECL(StringProcessing, DynamicCaptures, EnumDecl, 0)
54+
5155
#undef KNOWN_SDK_TYPE_DECL

include/swift/Frontend/Frontend.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -354,6 +354,10 @@ class CompilerInvocation {
354354
/// imported.
355355
bool shouldImportSwiftConcurrency() const;
356356

357+
/// Whether the Swift String Processing support library should be implicitly
358+
/// imported.
359+
bool shouldImportSwiftStringProcessing() const;
360+
357361
/// Performs input setup common to these tools:
358362
/// sil-opt, sil-func-extractor, sil-llvm-gen, and sil-nm.
359363
/// Return value includes the buffer so caller can keep it alive.
@@ -533,6 +537,14 @@ class CompilerInstance {
533537
/// i.e. if it can be found.
534538
bool canImportSwiftConcurrency() const;
535539

540+
/// Verify that if an implicit import of the `StringProcessing` module if
541+
/// expected, it can actually be imported. Emit a warning, otherwise.
542+
void verifyImplicitStringProcessingImport();
543+
544+
/// Whether the Swift String Processing support library can be imported
545+
/// i.e. if it can be found.
546+
bool canImportSwiftStringProcessing() const;
547+
536548
/// Gets the SourceFile which is the primary input for this CompilerInstance.
537549
/// \returns the primary SourceFile, or nullptr if there is no primary input;
538550
/// if there are _multiple_ primary inputs, fails with an assertion.

include/swift/Strings.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ constexpr static const StringLiteral SWIFT_ONONE_SUPPORT = "SwiftOnoneSupport";
2626
constexpr static const StringLiteral SWIFT_CONCURRENCY_NAME = "_Concurrency";
2727
/// The name of the Distributed module, which supports that extension.
2828
constexpr static const StringLiteral SWIFT_DISTRIBUTED_NAME = "_Distributed";
29+
/// The name of the StringProcessing module, which supports that extension.
30+
constexpr static const StringLiteral SWIFT_STRING_PROCESSING_NAME = "_StringProcessing";
2931
/// The name of the SwiftShims module, which contains private stdlib decls.
3032
constexpr static const StringLiteral SWIFT_SHIMS_NAME = "SwiftShims";
3133
/// The name of the Builtin module, which contains Builtin functions.

lib/AST/ASTContext.cpp

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1216,8 +1216,23 @@ ASTContext::getBuiltinInitDecl(NominalTypeDecl *decl,
12161216
return witness;
12171217
}
12181218

1219+
ConcreteDeclRef ASTContext::getRegexInitDecl(Type regexType) const {
1220+
auto *spModule = getLoadedModule(Id_StringProcessing);
1221+
DeclName name(*const_cast<ASTContext *>(this),
1222+
DeclBaseName::createConstructor(),
1223+
{Id_regexString});
1224+
SmallVector<ValueDecl *, 1> results;
1225+
spModule->lookupQualified(getRegexType(), DeclNameRef(name),
1226+
NL_IncludeUsableFromInline, results);
1227+
assert(results.size() == 1);
1228+
auto *foundDecl = cast<ConstructorDecl>(results[0]);
1229+
auto subs = regexType->getMemberSubstitutionMap(spModule, foundDecl);
1230+
return ConcreteDeclRef(foundDecl, subs);
1231+
}
1232+
12191233
static
1220-
FuncDecl *getBinaryComparisonOperatorIntDecl(const ASTContext &C, StringRef op, FuncDecl *&cached) {
1234+
FuncDecl *getBinaryComparisonOperatorIntDecl(const ASTContext &C, StringRef op,
1235+
FuncDecl *&cached) {
12211236
if (cached)
12221237
return cached;
12231238

lib/AST/ASTDumper.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1909,7 +1909,10 @@ class PrintExpr : public ExprVisitor<PrintExpr> {
19091909
}
19101910
void visitRegexLiteralExpr(RegexLiteralExpr *E) {
19111911
printCommon(E, "regex_literal_expr");
1912-
printRec(E->getSemanticExpr());
1912+
PrintWithColorRAII(OS, LiteralValueColor)
1913+
<< " text=" << QuotedString(E->getRegexText())
1914+
<< " initializer=";
1915+
E->getInitializer().dump(PrintWithColorRAII(OS, LiteralValueColor).getOS());
19131916
PrintWithColorRAII(OS, ParenthesisColor) << ')';
19141917
}
19151918

lib/AST/ASTWalker.cpp

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1153,11 +1153,6 @@ class Traversal : public ASTVisitor<Traversal, Expr*, Stmt*,
11531153
}
11541154

11551155
Expr *visitRegexLiteralExpr(RegexLiteralExpr *E) {
1156-
if (auto *newExpr = doIt(E->getSemanticExpr())) {
1157-
E->setSemanticExpr(newExpr);
1158-
} else {
1159-
return nullptr;
1160-
}
11611156
return E;
11621157
}
11631158

lib/AST/Expr.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2212,9 +2212,8 @@ SourceLoc TapExpr::getEndLoc() const {
22122212

22132213
RegexLiteralExpr *
22142214
RegexLiteralExpr::createParsed(ASTContext &ctx, SourceLoc loc,
2215-
StringRef regexText, Expr *semanticExpr) {
2216-
return new (ctx) RegexLiteralExpr(loc, regexText, semanticExpr,
2217-
/*implicit*/ false);
2215+
StringRef regexText) {
2216+
return new (ctx) RegexLiteralExpr(loc, regexText, /*implicit*/ false);
22182217
}
22192218

22202219
void swift::simple_display(llvm::raw_ostream &out, const ClosureExpr *CE) {

lib/Frontend/Frontend.cpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -793,6 +793,10 @@ bool CompilerInvocation::shouldImportSwiftConcurrency() const {
793793
FrontendOptions::ParseInputMode::SwiftModuleInterface;
794794
}
795795

796+
bool CompilerInvocation::shouldImportSwiftStringProcessing() const {
797+
return getLangOptions().EnableExperimentalStringProcessing;
798+
}
799+
796800
/// Implicitly import the SwiftOnoneSupport module in non-optimized
797801
/// builds. This allows for use of popular specialized functions
798802
/// from the standard library, which makes the non-optimized builds
@@ -833,6 +837,20 @@ bool CompilerInstance::canImportSwiftConcurrency() const {
833837
{getASTContext().getIdentifier(SWIFT_CONCURRENCY_NAME), SourceLoc()});
834838
}
835839

840+
void CompilerInstance::verifyImplicitStringProcessingImport() {
841+
if (Invocation.shouldImportSwiftStringProcessing() &&
842+
!canImportSwiftStringProcessing()) {
843+
Diagnostics.diagnose(SourceLoc(),
844+
diag::warn_implicit_string_processing_import_failed);
845+
}
846+
}
847+
848+
bool CompilerInstance::canImportSwiftStringProcessing() const {
849+
return getASTContext().canImportModule(
850+
{getASTContext().getIdentifier(SWIFT_STRING_PROCESSING_NAME),
851+
SourceLoc()});
852+
}
853+
836854
ImplicitImportInfo CompilerInstance::getImplicitImportInfo() const {
837855
auto &frontendOpts = Invocation.getFrontendOptions();
838856

@@ -873,6 +891,19 @@ ImplicitImportInfo CompilerInstance::getImplicitImportInfo() const {
873891
}
874892
}
875893

894+
if (Invocation.shouldImportSwiftStringProcessing()) {
895+
switch (imports.StdlibKind) {
896+
case ImplicitStdlibKind::Builtin:
897+
case ImplicitStdlibKind::None:
898+
break;
899+
900+
case ImplicitStdlibKind::Stdlib:
901+
if (canImportSwiftStringProcessing())
902+
pushImport(SWIFT_STRING_PROCESSING_NAME);
903+
break;
904+
}
905+
}
906+
876907
imports.ShouldImportUnderlyingModule = frontendOpts.ImportUnderlyingModule;
877908
imports.BridgingHeaderPath = frontendOpts.ImplicitObjCHeaderPath;
878909
return imports;
@@ -1084,6 +1115,7 @@ bool CompilerInstance::loadStdlibIfNeeded() {
10841115
}
10851116

10861117
verifyImplicitConcurrencyImport();
1118+
verifyImplicitStringProcessingImport();
10871119

10881120
// If we failed to load, we should have already diagnosed.
10891121
if (M->failedToLoad()) {

lib/Parse/ParseRegex.cpp

Lines changed: 1 addition & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -54,30 +54,6 @@ ParserResult<Expr> Parser::parseExprRegexLiteral() {
5454
diagnose(Tok, diag::regex_literal_parsing_error, errorStr);
5555

5656
auto loc = consumeToken();
57-
58-
// Create an implicit .init(_regexString: "<regex text>") call to serve as the
59-
// semantic expression for the regex. The type-checker will provide it with
60-
// the correct contextual type. We delay the contextual type for a couple of
61-
// reasons:
62-
// 1. We need to delay type lookup until after parsing.
63-
// 2. Even if the AST synthesis were done lazily in e.g a request, we don't
64-
// currently have great support for implicit TypeExprs for unopened generic
65-
// types, as we want to phase out the use of UnboundGenericType. The Regex
66-
// type isn't currently generic, but might be in the future.
67-
auto *regexStringExpr =
68-
new (Context) StringLiteralExpr(Context.AllocateCopy(regexText), loc);
69-
regexStringExpr->setImplicit();
70-
71-
DeclName initName(Context, DeclBaseName::createConstructor(),
72-
{Context.Id__regexString});
73-
DeclNameRef initNameRef(initName);
74-
auto *dotInit = new (Context) UnresolvedMemberExpr(
75-
/*dotLoc*/ loc, DeclNameLoc(loc), initNameRef, /*implicit*/ true);
76-
77-
auto *args =
78-
ArgumentList::forImplicitCallTo(initNameRef, {regexStringExpr}, Context);
79-
auto *call = CallExpr::createImplicit(Context, dotInit, args);
80-
8157
return makeParserResult(
82-
RegexLiteralExpr::createParsed(Context, loc, regexText, call));
58+
RegexLiteralExpr::createParsed(Context, loc, regexText));
8359
}

lib/SILGen/SILGenApply.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1896,6 +1896,25 @@ buildBuiltinLiteralArgs(SILGenFunction &SGF, SGFContext C,
18961896
return builtinLiteralArgs;
18971897
}
18981898

1899+
static inline PreparedArguments
1900+
buildBuiltinLiteralArgs(SILGenFunction &SGF, SGFContext C,
1901+
RegexLiteralExpr *expr) {
1902+
auto &ctx = SGF.getASTContext();
1903+
// %0 = string_literal <regex text>
1904+
auto strLiteralArgs = emitStringLiteral(SGF, expr, expr->getRegexText(), C,
1905+
StringLiteralExpr::Encoding::UTF8);
1906+
// %1 = function_ref String.init(
1907+
// _builtinStringLiteral:utf8CodeUnitCount:isASCII:)
1908+
// %2 = apply %1(%0, ..., ...) -> $String
1909+
auto strInitDecl = ctx.getStringBuiltinInitDecl(ctx.getStringDecl());
1910+
RValue string = SGF.emitApplyAllocatingInitializer(
1911+
expr, strInitDecl, std::move(strLiteralArgs),
1912+
/*overriddenSelfType*/ Type(), SGFContext());
1913+
PreparedArguments args({AnyFunctionType::Param(ctx.getStringType())});
1914+
args.add(expr, std::move(string));
1915+
return args;
1916+
}
1917+
18991918
static inline PreparedArguments
19001919
buildBuiltinLiteralArgs(SILGenFunction &SGF, SGFContext C,
19011920
MagicIdentifierLiteralExpr *magicLiteral) {
@@ -1961,6 +1980,8 @@ static inline PreparedArguments buildBuiltinLiteralArgs(SILGenFunction &SGF,
19611980
return buildBuiltinLiteralArgs(SGF, C, integerLiteral);
19621981
} else if (auto floatLiteral = dyn_cast<FloatLiteralExpr>(literal)) {
19631982
return buildBuiltinLiteralArgs(SGF, C, floatLiteral);
1983+
} else if (auto regexLiteral = dyn_cast<RegexLiteralExpr>(literal)) {
1984+
return buildBuiltinLiteralArgs(SGF, C, regexLiteral);
19641985
} else {
19651986
return buildBuiltinLiteralArgs(
19661987
SGF, C, cast<MagicIdentifierLiteralExpr>(literal));

lib/SILGen/SILGenExpr.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2528,7 +2528,7 @@ visitInterpolatedStringLiteralExpr(InterpolatedStringLiteralExpr *E,
25282528
}
25292529

25302530
RValue RValueEmitter::visitRegexLiteralExpr(RegexLiteralExpr *E, SGFContext C) {
2531-
return SGF.emitRValue(E->getSemanticExpr());
2531+
return SGF.emitLiteral(E, C);
25322532
}
25332533

25342534
RValue RValueEmitter::

lib/Sema/CSApply.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2792,7 +2792,10 @@ namespace {
27922792
}
27932793

27942794
Expr *visitRegexLiteralExpr(RegexLiteralExpr *expr) {
2795-
return simplifyExprType(expr);
2795+
simplifyExprType(expr);
2796+
expr->setInitializer(
2797+
cs.getASTContext().getRegexInitDecl(cs.getType(expr)));
2798+
return expr;
27962799
}
27972800

27982801
Expr *visitMagicIdentifierLiteralExpr(MagicIdentifierLiteralExpr *expr) {

0 commit comments

Comments
 (0)