Skip to content

Commit 60c78af

Browse files
authored
Merge pull request #40065 from ahoppen/pr/lexing-cutoff
[Parser] Don't modify the current token kind when cutting off parsing
2 parents 5b6dbd2 + b888dc0 commit 60c78af

File tree

10 files changed

+863
-76
lines changed

10 files changed

+863
-76
lines changed

include/swift/Parse/Lexer.h

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,11 @@ class Lexer {
133133
/// the next token doesn't have a comment.
134134
const char *CommentStart;
135135

136+
/// If this is not \c nullptr, all tokens after this point are treated as eof.
137+
/// Used to cut off lexing early when we detect that the nesting level is too
138+
/// deep.
139+
const char *LexerCutOffPoint = nullptr;
140+
136141
Lexer(const Lexer&) = delete;
137142
void operator=(const Lexer&) = delete;
138143

@@ -222,6 +227,28 @@ class Lexer {
222227
lexImpl();
223228
}
224229

230+
/// Cut off lexing at the current position. The next token to be lexed will
231+
/// be an EOF token, even if there is still source code to be lexed.
232+
/// The current and next token (returned by \c peekNextToken ) are not
233+
/// modified. The token after \c NextToken will be the EOF token.
234+
void cutOffLexing() {
235+
// If we already have a cut off point, don't push it further towards the
236+
// back.
237+
if (LexerCutOffPoint == nullptr || LexerCutOffPoint >= CurPtr) {
238+
LexerCutOffPoint = CurPtr;
239+
}
240+
}
241+
242+
/// If a lexer cut off point has been set returns the offset in the buffer at
243+
/// which lexing is being cut off.
244+
Optional<size_t> lexingCutOffOffset() const {
245+
if (LexerCutOffPoint) {
246+
return LexerCutOffPoint - BufferStart;
247+
} else {
248+
return None;
249+
}
250+
}
251+
225252
bool isKeepingComments() const {
226253
return RetainComments == CommentRetentionMode::ReturnAsTokens;
227254
}

include/swift/Parse/Parser.h

Lines changed: 5 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -221,12 +221,6 @@ class Parser {
221221
/// The location of the previous token.
222222
SourceLoc PreviousLoc;
223223

224-
/// Stop parsing immediately.
225-
void cutOffParsing() {
226-
// Cut off parsing by acting as if we reached the end-of-file.
227-
Tok.setKind(tok::eof);
228-
}
229-
230224
/// Use this to assert that the parser has advanced the lexing location, e.g.
231225
/// before a specific parser function has returned.
232226
class AssertParserMadeProgressBeforeLeavingScopeRAII {
@@ -329,35 +323,21 @@ class Parser {
329323

330324
/// An RAII object that notes when we have seen a structure marker.
331325
class StructureMarkerRAII {
332-
Parser *const P;
326+
Parser &P;
333327

334328
/// Max nesting level
335329
// TODO: customizable.
336330
enum { MaxDepth = 256 };
337331

338-
StructureMarkerRAII(Parser *parser) : P(parser) {}
339-
340-
/// Have the parser start the new Structure or fail if already too deep.
341-
bool pushStructureMarker(Parser &parser, SourceLoc loc,
342-
StructureMarkerKind kind);
332+
StructureMarkerRAII(Parser &parser) : P(parser) {}
343333

344334
public:
345-
StructureMarkerRAII(Parser &parser, SourceLoc loc, StructureMarkerKind kind)
346-
: StructureMarkerRAII(
347-
pushStructureMarker(parser, loc, kind) ? &parser : nullptr) {}
335+
StructureMarkerRAII(Parser &parser, SourceLoc loc,
336+
StructureMarkerKind kind);
348337

349338
StructureMarkerRAII(Parser &parser, const Token &tok);
350339

351-
/// Did we fail to push the new structure?
352-
bool isFailed() {
353-
return P == nullptr;
354-
}
355-
356-
~StructureMarkerRAII() {
357-
if (P != nullptr) {
358-
P->StructureMarkers.pop_back();
359-
}
360-
}
340+
~StructureMarkerRAII() { P.StructureMarkers.pop_back(); }
361341
};
362342
friend class StructureMarkerRAII;
363343

lib/Parse/Lexer.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2349,7 +2349,11 @@ void Lexer::lexImpl() {
23492349

23502350
// Remember the start of the token so we can form the text range.
23512351
const char *TokStart = CurPtr;
2352-
2352+
2353+
if (LexerCutOffPoint && CurPtr >= LexerCutOffPoint) {
2354+
return formToken(tok::eof, TokStart);
2355+
}
2356+
23532357
switch (*CurPtr++) {
23542358
default: {
23552359
char const *Tmp = CurPtr-1;

lib/Parse/ParseExpr.cpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3121,10 +3121,6 @@ ParserStatus Parser::parseExprList(tok leftTok, tok rightTok,
31213121
SourceLoc &rightLoc, SyntaxKind Kind) {
31223122
StructureMarkerRAII ParsingExprList(*this, Tok);
31233123

3124-
if (ParsingExprList.isFailed()) {
3125-
return makeParserError();
3126-
}
3127-
31283124
leftLoc = consumeToken(leftTok);
31293125
return parseList(rightTok, leftLoc, rightLoc, /*AllowSepAfterLast=*/false,
31303126
rightTok == tok::r_paren ? diag::expected_rparen_expr_list

lib/Parse/ParseGeneric.cpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -57,10 +57,6 @@ Parser::parseGenericParametersBeforeWhere(SourceLoc LAngleLoc,
5757
// Note that we're parsing a declaration.
5858
StructureMarkerRAII ParsingDecl(*this, Tok.getLoc(),
5959
StructureMarkerKind::Declaration);
60-
61-
if (ParsingDecl.isFailed()) {
62-
return makeParserError();
63-
}
6460

6561
// Parse attributes.
6662
DeclAttributes attributes;

lib/Parse/ParsePattern.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1216,9 +1216,6 @@ ParserResult<Pattern> Parser::parsePatternTuple() {
12161216
SyntaxParsingContext TuplePatternCtxt(SyntaxContext,
12171217
SyntaxKind::TuplePattern);
12181218
StructureMarkerRAII ParsingPatternTuple(*this, Tok);
1219-
if (ParsingPatternTuple.isFailed()) {
1220-
return makeParserError();
1221-
}
12221219
SourceLoc LPLoc = consumeToken(tok::l_paren);
12231220
SourceLoc RPLoc;
12241221

lib/Parse/ParseStmt.cpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1280,10 +1280,6 @@ ParserResult<PoundAvailableInfo> Parser::parseStmtConditionPoundAvailable() {
12801280

12811281
StructureMarkerRAII ParsingAvailabilitySpecList(*this, Tok);
12821282

1283-
if (ParsingAvailabilitySpecList.isFailed()) {
1284-
return makeParserError();
1285-
}
1286-
12871283
SourceLoc LParenLoc = consumeToken(tok::l_paren);
12881284

12891285
SmallVector<AvailabilitySpec *, 5> Specs;

lib/Parse/ParseType.cpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1017,10 +1017,6 @@ ParserResult<TypeRepr> Parser::parseTypeTupleBody() {
10171017
TypeContext.setCreateSyntax(SyntaxKind::TupleType);
10181018
Parser::StructureMarkerRAII ParsingTypeTuple(*this, Tok);
10191019

1020-
if (ParsingTypeTuple.isFailed()) {
1021-
return makeParserError();
1022-
}
1023-
10241020
SourceLoc RPLoc, LPLoc = consumeToken(tok::l_paren);
10251021
SourceLoc EllipsisLoc;
10261022
unsigned EllipsisIdx;

lib/Parse/Parser.cpp

Lines changed: 34 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -405,6 +405,9 @@ namespace {
405405
/// underlying corrected token stream.
406406
class TokenRecorder: public ConsumeTokenReceiver {
407407
ASTContext &Ctx;
408+
/// The lexer that is being used to lex the source file. Used to query whether
409+
/// lexing has been cut off.
410+
Lexer &BaseLexer;
408411
unsigned BufferID;
409412

410413
// Token list ordered by their appearance in the source file.
@@ -425,11 +428,19 @@ class TokenRecorder: public ConsumeTokenReceiver {
425428
void relexComment(CharSourceRange CommentRange,
426429
llvm::SmallVectorImpl<Token> &Scratch) {
427430
auto &SM = Ctx.SourceMgr;
431+
auto EndOffset = SM.getLocOffsetInBuffer(CommentRange.getEnd(), BufferID);
432+
if (auto LexerCutOffOffset = BaseLexer.lexingCutOffOffset()) {
433+
if (*LexerCutOffOffset < EndOffset) {
434+
// If lexing was cut off due to a too deep nesting level, adjust the end
435+
// offset to not point past the cut off point.
436+
EndOffset = *LexerCutOffOffset;
437+
}
438+
}
428439
Lexer L(Ctx.LangOpts, SM, BufferID, nullptr, LexerMode::Swift,
429440
HashbangMode::Disallowed, CommentRetentionMode::ReturnAsTokens,
430441
TriviaRetentionMode::WithoutTrivia,
431442
SM.getLocOffsetInBuffer(CommentRange.getStart(), BufferID),
432-
SM.getLocOffsetInBuffer(CommentRange.getEnd(), BufferID));
443+
EndOffset);
433444
while(true) {
434445
Token Result;
435446
L.lex(Result);
@@ -441,8 +452,8 @@ class TokenRecorder: public ConsumeTokenReceiver {
441452
}
442453

443454
public:
444-
TokenRecorder(ASTContext &ctx, unsigned BufferID)
445-
: Ctx(ctx), BufferID(BufferID) {}
455+
TokenRecorder(ASTContext &ctx, Lexer &BaseLexer)
456+
: Ctx(ctx), BaseLexer(BaseLexer), BufferID(BaseLexer.getBufferID()) {}
446457

447458
Optional<std::vector<Token>> finalize() override {
448459
auto &SM = Ctx.SourceMgr;
@@ -516,19 +527,14 @@ class TokenRecorder: public ConsumeTokenReceiver {
516527
Parser::Parser(std::unique_ptr<Lexer> Lex, SourceFile &SF,
517528
SILParserStateBase *SIL, PersistentParserState *PersistentState,
518529
std::shared_ptr<SyntaxParseActions> SPActions)
519-
: SourceMgr(SF.getASTContext().SourceMgr),
520-
Diags(SF.getASTContext().Diags),
521-
SF(SF),
522-
L(Lex.release()),
523-
SIL(SIL),
524-
CurDeclContext(&SF),
525-
Context(SF.getASTContext()),
526-
TokReceiver(SF.shouldCollectTokens() ?
527-
new TokenRecorder(SF.getASTContext(), L->getBufferID()) :
528-
new ConsumeTokenReceiver()),
529-
SyntaxContext(new SyntaxParsingContext(SyntaxContext, SF,
530-
L->getBufferID(),
531-
std::move(SPActions))) {
530+
: SourceMgr(SF.getASTContext().SourceMgr), Diags(SF.getASTContext().Diags),
531+
SF(SF), L(Lex.release()), SIL(SIL), CurDeclContext(&SF),
532+
Context(SF.getASTContext()),
533+
TokReceiver(SF.shouldCollectTokens()
534+
? new TokenRecorder(SF.getASTContext(), *L)
535+
: new ConsumeTokenReceiver()),
536+
SyntaxContext(new SyntaxParsingContext(
537+
SyntaxContext, SF, L->getBufferID(), std::move(SPActions))) {
532538
State = PersistentState;
533539
if (!State) {
534540
OwnedState.reset(new PersistentParserState());
@@ -880,28 +886,25 @@ getStructureMarkerKindForToken(const Token &tok) {
880886
}
881887
}
882888

883-
Parser::StructureMarkerRAII::StructureMarkerRAII(Parser &parser,
884-
const Token &tok)
885-
: StructureMarkerRAII(parser, tok.getLoc(),
886-
getStructureMarkerKindForToken(tok)) {}
887-
888-
bool Parser::StructureMarkerRAII::pushStructureMarker(
889-
Parser &parser, SourceLoc loc,
890-
StructureMarkerKind kind) {
891-
892-
if (parser.StructureMarkers.size() < MaxDepth) {
893-
parser.StructureMarkers.push_back({loc, kind, None});
894-
return true;
895-
} else {
889+
Parser::StructureMarkerRAII::StructureMarkerRAII(Parser &parser, SourceLoc loc,
890+
StructureMarkerKind kind)
891+
: StructureMarkerRAII(parser) {
892+
parser.StructureMarkers.push_back({loc, kind, None});
893+
if (parser.StructureMarkers.size() > MaxDepth) {
896894
parser.diagnose(loc, diag::structure_overflow, MaxDepth);
897895
// We need to cut off parsing or we will stack-overflow.
898896
// But `cutOffParsing` changes the current token to eof, and we may be in
899897
// a place where `consumeToken()` will be expecting e.g. '[',
900898
// since we need that to get to the callsite, so this can cause an assert.
901-
parser.cutOffParsing();
902-
return false;
899+
parser.L->cutOffLexing();
903900
}
904901
}
902+
903+
Parser::StructureMarkerRAII::StructureMarkerRAII(Parser &parser,
904+
const Token &tok)
905+
: StructureMarkerRAII(parser, tok.getLoc(),
906+
getStructureMarkerKindForToken(tok)) {}
907+
905908
//===----------------------------------------------------------------------===//
906909
// Primitive Parsing
907910
//===----------------------------------------------------------------------===//

0 commit comments

Comments
 (0)