Skip to content

Commit 7c3f965

Browse files
authored
Merge pull request #76979 from hamishknight/regex-request
Requestify regex pattern parsing
2 parents 4afc2d4 + 6a43596 commit 7c3f965

File tree

17 files changed

+211
-242
lines changed

17 files changed

+211
-242
lines changed

include/swift/AST/ASTBridging.h

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1412,17 +1412,11 @@ BridgedPrefixUnaryExpr
14121412
BridgedPrefixUnaryExpr_createParsed(BridgedASTContext cContext,
14131413
BridgedExpr oper, BridgedExpr operand);
14141414

1415-
SWIFT_NAME("BridgedRegexLiteralExpr."
1416-
"allocateCaptureStructureSerializationBuffer(_:size:)")
1417-
BridgedData BridgedRegexLiteralExpr_allocateCaptureStructureSerializationBuffer(
1418-
BridgedASTContext cContext, SwiftInt size);
1419-
1420-
SWIFT_NAME("BridgedRegexLiteralExpr.createParsed(_:loc:regexText:version:"
1421-
"captureStructure:)")
1422-
BridgedRegexLiteralExpr BridgedRegexLiteralExpr_createParsed(
1423-
BridgedASTContext cContext, BridgedSourceLoc cLoc,
1424-
BridgedStringRef cRegexText, SwiftInt version,
1425-
BridgedData cCaptureStructure);
1415+
SWIFT_NAME("BridgedRegexLiteralExpr.createParsed(_:loc:regexText:)")
1416+
BridgedRegexLiteralExpr
1417+
BridgedRegexLiteralExpr_createParsed(BridgedASTContext cContext,
1418+
BridgedSourceLoc cLoc,
1419+
BridgedStringRef cRegexText);
14261420

14271421
SWIFT_NAME("BridgedSequenceExpr.createParsed(_:exprs:)")
14281422
BridgedSequenceExpr BridgedSequenceExpr_createParsed(BridgedASTContext cContext,

include/swift/AST/Expr.h

Lines changed: 17 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -994,39 +994,32 @@ class InterpolatedStringLiteralExpr : public LiteralExpr {
994994

995995
/// A regular expression literal e.g '(a|c)*'.
996996
class RegexLiteralExpr : public LiteralExpr {
997+
ASTContext *Ctx;
997998
SourceLoc Loc;
998-
StringRef RegexText;
999-
unsigned Version;
1000-
ArrayRef<uint8_t> SerializedCaptureStructure;
999+
StringRef ParsedRegexText;
10011000

1002-
RegexLiteralExpr(SourceLoc loc, StringRef regexText, unsigned version,
1003-
ArrayRef<uint8_t> serializedCaps,
1001+
RegexLiteralExpr(ASTContext *ctx, SourceLoc loc, StringRef parsedRegexText,
10041002
bool isImplicit)
1005-
: LiteralExpr(ExprKind::RegexLiteral, isImplicit), Loc(loc),
1006-
RegexText(regexText), Version(version),
1007-
SerializedCaptureStructure(serializedCaps) {}
1003+
: LiteralExpr(ExprKind::RegexLiteral, isImplicit), Ctx(ctx), Loc(loc),
1004+
ParsedRegexText(parsedRegexText) {}
10081005

10091006
public:
1010-
static RegexLiteralExpr *createParsed(
1011-
ASTContext &ctx, SourceLoc loc, StringRef regexText, unsigned version,
1012-
ArrayRef<uint8_t> serializedCaptureStructure);
1007+
static RegexLiteralExpr *createParsed(ASTContext &ctx, SourceLoc loc,
1008+
StringRef regexText);
10131009

1014-
typedef uint16_t CaptureStructureSerializationVersion;
1010+
ASTContext &getASTContext() const { return *Ctx; }
10151011

1016-
static unsigned getCaptureStructureSerializationAllocationSize(
1017-
unsigned regexLength) {
1018-
return sizeof(CaptureStructureSerializationVersion) + regexLength + 1;
1019-
}
1012+
/// Retrieve the raw parsed regex text.
1013+
StringRef getParsedRegexText() const { return ParsedRegexText; }
10201014

1021-
/// Retrieve the raw regex text.
1022-
StringRef getRegexText() const { return RegexText; }
1015+
/// Retrieve the regex pattern to emit.
1016+
StringRef getRegexToEmit() const;
10231017

1024-
/// Retrieve the version of the regex string.
1025-
unsigned getVersion() const { return Version; }
1018+
/// Retrieve the computed type for the regex.
1019+
Type getRegexType() const;
10261020

1027-
ArrayRef<uint8_t> getSerializedCaptureStructure() {
1028-
return SerializedCaptureStructure;
1029-
}
1021+
/// Retrieve the version of the regex string.
1022+
unsigned getVersion() const;
10301023

10311024
SourceRange getSourceRange() const { return Loc; }
10321025

@@ -6558,9 +6551,8 @@ void simple_display(llvm::raw_ostream &out, const ClosureExpr *CE);
65586551
void simple_display(llvm::raw_ostream &out, const DefaultArgumentExpr *expr);
65596552
void simple_display(llvm::raw_ostream &out, const Expr *expr);
65606553

6561-
SourceLoc extractNearestSourceLoc(const DefaultArgumentExpr *expr);
6562-
SourceLoc extractNearestSourceLoc(const MacroExpansionExpr *expr);
65636554
SourceLoc extractNearestSourceLoc(const ClosureExpr *expr);
6555+
SourceLoc extractNearestSourceLoc(const Expr *expr);
65646556

65656557
} // end namespace swift
65666558

include/swift/AST/TypeCheckRequests.h

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5087,6 +5087,31 @@ class SuppressesConformanceRequest
50875087
bool isCached() const { return true; }
50885088
};
50895089

5090+
struct RegexLiteralPatternInfo {
5091+
StringRef RegexToEmit;
5092+
Type RegexType;
5093+
size_t Version;
5094+
};
5095+
5096+
/// Parses the regex pattern for a given regex literal using the
5097+
/// compiler's regex parsing library, and returns the resulting info.
5098+
class RegexLiteralPatternInfoRequest
5099+
: public SimpleRequest<RegexLiteralPatternInfoRequest,
5100+
RegexLiteralPatternInfo(const RegexLiteralExpr *),
5101+
RequestFlags::Cached> {
5102+
public:
5103+
using SimpleRequest::SimpleRequest;
5104+
5105+
private:
5106+
friend SimpleRequest;
5107+
5108+
RegexLiteralPatternInfo evaluate(Evaluator &evaluator,
5109+
const RegexLiteralExpr *regex) const;
5110+
5111+
public:
5112+
bool isCached() const { return true; }
5113+
};
5114+
50905115
class IsUnsafeRequest
50915116
: public SimpleRequest<IsUnsafeRequest,
50925117
bool(Decl *decl),

include/swift/AST/TypeCheckerTypeIDZone.def

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -589,6 +589,10 @@ SWIFT_REQUEST(TypeChecker, SuppressesConformanceRequest,
589589
bool(NominalTypeDecl *decl, KnownProtocolKind kp),
590590
SeparatelyCached, NoLocationInfo)
591591

592+
SWIFT_REQUEST(TypeChecker, RegexLiteralPatternInfoRequest,
593+
RegexLiteralPatternInfo(const RegexLiteralExpr *),
594+
Cached, NoLocationInfo)
595+
592596
SWIFT_REQUEST(TypeChecker, CaptureInfoRequest,
593597
CaptureInfo(AbstractFunctionDecl *),
594598
SeparatelyCached, NoLocationInfo)

lib/AST/ASTDumper.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2280,8 +2280,8 @@ class PrintExpr : public ExprVisitor<PrintExpr, void, StringRef>,
22802280
}
22812281
void visitRegexLiteralExpr(RegexLiteralExpr *E, StringRef label) {
22822282
printCommon(E, "regex_literal_expr", label);
2283-
2284-
printFieldQuoted(E->getRegexText(), "text", LiteralValueColor);
2283+
2284+
printFieldQuoted(E->getParsedRegexText(), "text", LiteralValueColor);
22852285
printInitializerField(E->getInitializer(), "initializer");
22862286

22872287
printFoot();

lib/AST/ASTPrinter.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4746,7 +4746,7 @@ void PrintAST::visitBooleanLiteralExpr(BooleanLiteralExpr *expr) {
47464746
}
47474747

47484748
void PrintAST::visitRegexLiteralExpr(RegexLiteralExpr *expr) {
4749-
Printer << expr->getRegexText();
4749+
Printer << expr->getParsedRegexText();
47504750
}
47514751

47524752
void PrintAST::visitErrorExpr(ErrorExpr *expr) {

lib/AST/Bridging/ExprBridging.cpp

Lines changed: 5 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -241,8 +241,6 @@ BridgedUnresolvedSpecializeExpr BridgedUnresolvedSpecializeExpr_createParsed(
241241
BridgedASTContext cContext, BridgedExpr cSubExpr,
242242
BridgedSourceLoc cLAngleLoc, BridgedArrayRef cArguments,
243243
BridgedSourceLoc cRAngleLoc) {
244-
245-
ASTContext &context = cContext.unbridged();
246244
return UnresolvedSpecializeExpr::create(
247245
cContext.unbridged(), cSubExpr.unbridged(), cLAngleLoc.unbridged(),
248246
cArguments.unbridged<TypeRepr *>(), cRAngleLoc.unbridged());
@@ -341,25 +339,12 @@ BridgedPrefixUnaryExpr_createParsed(BridgedASTContext cContext,
341339
operand.unbridged());
342340
}
343341

344-
BridgedData BridgedRegexLiteralExpr_allocateCaptureStructureSerializationBuffer(
345-
BridgedASTContext cContext, SwiftInt size) {
346-
auto buf = cContext.unbridged().AllocateUninitialized<uint8_t>(
347-
RegexLiteralExpr::getCaptureStructureSerializationAllocationSize(
348-
unsigned(size)));
349-
return BridgedData(reinterpret_cast<const char *>(buf.data()), buf.size());
350-
}
351-
352-
BridgedRegexLiteralExpr BridgedRegexLiteralExpr_createParsed(
353-
BridgedASTContext cContext, BridgedSourceLoc cLoc,
354-
BridgedStringRef cRegexText, SwiftInt version,
355-
BridgedData cCaptureStructure) {
356-
ArrayRef<uint8_t> captures(
357-
reinterpret_cast<const uint8_t *>(cCaptureStructure.BaseAddress),
358-
cCaptureStructure.Length);
359-
342+
BridgedRegexLiteralExpr
343+
BridgedRegexLiteralExpr_createParsed(BridgedASTContext cContext,
344+
BridgedSourceLoc cLoc,
345+
BridgedStringRef cRegexText) {
360346
return RegexLiteralExpr::createParsed(cContext.unbridged(), cLoc.unbridged(),
361-
cRegexText.unbridged(),
362-
unsigned(version), captures);
347+
cRegexText.unbridged());
363348
}
364349

365350
BridgedSequenceExpr BridgedSequenceExpr_createParsed(BridgedASTContext cContext,

lib/AST/Expr.cpp

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2749,12 +2749,27 @@ SourceRange TapExpr::getSourceRange() const {
27492749
Body->getSourceRange());
27502750
}
27512751

2752-
RegexLiteralExpr *
2753-
RegexLiteralExpr::createParsed(ASTContext &ctx, SourceLoc loc,
2754-
StringRef regexText, unsigned version,
2755-
ArrayRef<uint8_t> serializedCaps) {
2756-
return new (ctx) RegexLiteralExpr(loc, regexText, version, serializedCaps,
2757-
/*implicit*/ false);
2752+
RegexLiteralExpr *RegexLiteralExpr::createParsed(ASTContext &ctx, SourceLoc loc,
2753+
StringRef regexText) {
2754+
return new (ctx) RegexLiteralExpr(&ctx, loc, regexText, /*implicit*/ false);
2755+
}
2756+
2757+
StringRef RegexLiteralExpr::getRegexToEmit() const {
2758+
auto &eval = getASTContext().evaluator;
2759+
return evaluateOrDefault(eval, RegexLiteralPatternInfoRequest{this}, {})
2760+
.RegexToEmit;
2761+
}
2762+
2763+
Type RegexLiteralExpr::getRegexType() const {
2764+
auto &eval = getASTContext().evaluator;
2765+
return evaluateOrDefault(eval, RegexLiteralPatternInfoRequest{this}, {})
2766+
.RegexType;
2767+
}
2768+
2769+
unsigned RegexLiteralExpr::getVersion() const {
2770+
auto &eval = getASTContext().evaluator;
2771+
return evaluateOrDefault(eval, RegexLiteralPatternInfoRequest{this}, {})
2772+
.Version;
27582773
}
27592774

27602775
TypeJoinExpr::TypeJoinExpr(llvm::PointerUnion<DeclRefExpr *, TypeBase *> result,
@@ -2860,11 +2875,7 @@ SourceLoc swift::extractNearestSourceLoc(const ClosureExpr *expr) {
28602875
return expr->getLoc();
28612876
}
28622877

2863-
SourceLoc swift::extractNearestSourceLoc(const DefaultArgumentExpr *expr) {
2864-
return expr->getLoc();
2865-
}
2866-
2867-
SourceLoc swift::extractNearestSourceLoc(const MacroExpansionExpr *expr) {
2878+
SourceLoc swift::extractNearestSourceLoc(const Expr *expr) {
28682879
return expr->getLoc();
28692880
}
28702881

lib/ASTGen/Sources/ASTGen/Exprs.swift

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ extension ASTGenVisitor {
154154
case .prefixOperatorExpr(let node):
155155
return self.generate(prefixOperatorExpr: node).asExpr
156156
case .regexLiteralExpr(let node):
157-
return self.generate(regexLiteralExpr: node)
157+
return self.generate(regexLiteralExpr: node).asExpr
158158
case .sequenceExpr(let node):
159159
return self.generate(sequenceExpr: node)
160160
case .simpleStringLiteralExpr:
@@ -605,6 +605,19 @@ extension ASTGenVisitor {
605605
)
606606
}
607607

608+
func generate(regexLiteralExpr node: RegexLiteralExprSyntax) -> BridgedRegexLiteralExpr {
609+
// Copy the regex string to the ASTContext.
610+
var str = node.trimmedDescription
611+
let regexText = str.withBridgedString {
612+
self.ctx.allocateCopy(string: $0)
613+
}
614+
return .createParsed(
615+
self.ctx,
616+
loc: self.generateSourceLoc(node),
617+
regexText: regexText
618+
)
619+
}
620+
608621
func generate(sequenceExpr node: SequenceExprSyntax) -> BridgedExpr {
609622
assert(
610623
!node.elements.count.isMultiple(of: 2),

lib/ASTGen/Sources/ASTGen/Regex.swift

Lines changed: 0 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -18,56 +18,6 @@ import SwiftSyntax
1818
#if canImport(_CompilerRegexParser)
1919
@_spi(CompilerInterface) import _CompilerRegexParser
2020

21-
extension ASTGenVisitor {
22-
func generate(regexLiteralExpr node: RegexLiteralExprSyntax) -> BridgedExpr {
23-
let str = node.trimmedDescription
24-
let captureBuffer = BridgedRegexLiteralExpr
25-
.allocateCaptureStructureSerializationBuffer(self.ctx, size: str.utf8.count)
26-
let captureBufferOut = UnsafeMutableRawBufferPointer(
27-
start: UnsafeMutableRawPointer(mutating: captureBuffer.baseAddress),
28-
count: captureBuffer.count
29-
)
30-
31-
let loc = self.generateSourceLoc(node);
32-
33-
do {
34-
// FIXME: We need to plumb through the 'regexToEmit' result to the caller.
35-
// For now, it is the same as the input.
36-
var regexToEmit: String
37-
let version: Int
38-
(regexToEmit, version) = try swiftCompilerParseRegexLiteral(
39-
str,
40-
captureBufferOut: captureBufferOut
41-
)
42-
// Copy the regex string to the ASTContext.
43-
let regexToEmitStr = regexToEmit.withBridgedString {
44-
self.ctx.allocateCopy(string: $0)
45-
}
46-
47-
return BridgedRegexLiteralExpr.createParsed(
48-
self.ctx,
49-
loc: loc,
50-
regexText: regexToEmitStr,
51-
version: version,
52-
captureStructure: captureBuffer
53-
).asExpr
54-
} catch let error as _CompilerRegexParser.CompilerParseError {
55-
let offset = error.location != nil ? str.utf8.offset(of: error.location!) : 0
56-
let position = node.positionAfterSkippingLeadingTrivia.advanced(by: offset)
57-
self.diagnose(
58-
Diagnostic(
59-
node: node.regex,
60-
position: position,
61-
message: RegexParserError(error.message)
62-
)
63-
)
64-
return BridgedErrorExpr.create(self.ctx, loc: BridgedSourceRange(start: loc, end: loc)).asExpr
65-
} catch {
66-
fatalError("Expected CompilerParseError")
67-
}
68-
}
69-
}
70-
7121
/// Bridging between C++ lexer and swiftCompilerLexRegexLiteral.
7222
///
7323
/// Attempt to lex a regex literal string.

lib/Parse/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ endif()
3939

4040
if(SWIFT_BUILD_REGEX_PARSER_IN_COMPILER)
4141
target_compile_definitions(swiftParse
42-
PRIVATE
42+
PUBLIC
4343
SWIFT_BUILD_REGEX_PARSER_IN_COMPILER
4444
)
4545
endif()

lib/Parse/ParseRegex.cpp

Lines changed: 2 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -14,41 +14,15 @@
1414
//
1515
//===----------------------------------------------------------------------===//
1616

17-
#include "swift/AST/DiagnosticsParse.h"
18-
#include "swift/Basic/Assertions.h"
19-
#include "swift/Bridging/ASTGen.h"
2017
#include "swift/Parse/Parser.h"
2118

2219
using namespace swift;
2320

2421
ParserResult<Expr> Parser::parseExprRegexLiteral() {
2522
assert(Tok.is(tok::regex_literal));
26-
27-
#if SWIFT_BUILD_REGEX_PARSER_IN_COMPILER
2823
auto regexText = Tok.getText();
29-
30-
// Let the Swift library parse the contents, returning an error, or null if
31-
// successful.
32-
size_t version = 0;
33-
auto capturesBuf = Context.AllocateUninitialized<uint8_t>(
34-
RegexLiteralExpr::getCaptureStructureSerializationAllocationSize(
35-
regexText.size()));
36-
bool hadError = swift_ASTGen_parseRegexLiteral(
37-
regexText,
38-
/*versionOut=*/&version,
39-
/*captureStructureOut=*/capturesBuf.data(),
40-
/*captureStructureSize=*/capturesBuf.size(),
41-
/*diagBaseLoc=*/Tok.getLoc(), &Diags);
4224
auto loc = consumeToken();
4325
SourceMgr.recordRegexLiteralStartLoc(loc);
44-
45-
if (hadError) {
46-
return makeParserResult(new (Context) ErrorExpr(loc));
47-
}
48-
assert(version >= 1);
49-
return makeParserResult(RegexLiteralExpr::createParsed(
50-
Context, loc, regexText, version, capturesBuf));
51-
#else
52-
llvm_unreachable("Lexer should not emit tok::regex_literal");
53-
#endif
26+
return makeParserResult(
27+
RegexLiteralExpr::createParsed(Context, loc, regexText));
5428
}

lib/SILGen/SILGenApply.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2130,8 +2130,8 @@ buildBuiltinLiteralArgs(SILGenFunction &SGF, SGFContext C,
21302130
RegexLiteralExpr *expr) {
21312131
auto &ctx = SGF.getASTContext();
21322132
// %0 = string_literal <regex text>
2133-
auto strLiteralArgs = emitStringLiteralArgs(SGF, expr, expr->getRegexText(), C,
2134-
StringLiteralExpr::Encoding::UTF8);
2133+
auto strLiteralArgs = emitStringLiteralArgs(
2134+
SGF, expr, expr->getRegexToEmit(), C, StringLiteralExpr::Encoding::UTF8);
21352135
// %1 = function_ref String.init(
21362136
// _builtinStringLiteral:utf8CodeUnitCount:isASCII:)
21372137
// %2 = apply %1(%0, ..., ...) -> $String

0 commit comments

Comments
 (0)