Skip to content

Fix exp-time parse backtrack by saving if '[' starts Array or Dictionary #20502

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 69 additions & 0 deletions include/swift/Parse/Parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,75 @@ class Parser {
/// Current syntax parsing context where call backs should be directed to.
SyntaxParsingContext *SyntaxContext;

// Save the results of discovering whether a given '[' is the start of
// an Array or a Dictionary. Remembering this prevents
// `parseExprCollection()` from having to parse the first sub-expr in
// square brackets {2^n} times, where n is # of nested square brackets.
class ArrayOrDictStartLocCache {
typedef unsigned SquareBracketLoc;

llvm::DenseMap<SquareBracketLoc, bool> IsDictMap;
int UseCount;
unsigned BufferID;

public:
ArrayOrDictStartLocCache() : UseCount(0), BufferID(INT_MAX) {}

struct CachedVal {
bool IsCached;
bool IsDict;
CachedVal(bool IsCached, bool IsDict)
: IsCached(IsCached), IsDict(IsDict) {}
};

// Handle used by all the Parser::parseExprCollection() calls on the stack
class RAII {
Parser &P;
/// Translate a ParserPosition into something we can use as a map key.
SquareBracketLoc TranslatePos(SourceLoc Loc) {
// SourceLoc Loc = Pos.PreviousLoc;
assert(
P.BracketCache.BufferID ==
P.SourceMgr.findBufferContainingLoc(Loc) &&
"square-bracket sub-expressions are assumed not to cross files!");
return P.SourceMgr.getLocOffsetInBuffer(Loc, P.BracketCache.BufferID);
}

public:
RAII(Parser &P) : P(P) {
auto &cache = P.BracketCache;
if (cache.UseCount == 0) {
cache.BufferID = P.SourceMgr.findBufferContainingLoc(
P.getParserPosition().PreviousLoc);
}
++cache.UseCount;
}
~RAII() {
auto &cache = P.BracketCache;
--cache.UseCount;
if (cache.UseCount == 0) { // The topmost speculative parse has ended
cache.IsDictMap.shrink_and_clear();
cache.BufferID = INT_MAX;
}
}

void setIsDictStarting(SourceLoc Loc, bool IsDict) {
SquareBracketLoc SBLoc = TranslatePos(Loc);
P.BracketCache.IsDictMap[SBLoc] = IsDict;
}

CachedVal whatStartsAt(SourceLoc Loc) {
SquareBracketLoc SBLoc = TranslatePos(Loc);
auto i = P.BracketCache.IsDictMap.find(SBLoc);
if (i == P.BracketCache.IsDictMap.end())
return CachedVal(false, false);
else
return CachedVal(true, i->second);
}
};
};
ArrayOrDictStartLocCache BracketCache;

public:
Parser(unsigned BufferID, SourceFile &SF, DiagnosticEngine* LexerDiags,
SILParserTUStateBase *SIL,
Expand Down
8 changes: 8 additions & 0 deletions lib/Parse/ParseExpr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3382,6 +3382,13 @@ ParserResult<Expr> Parser::parseExprCollection() {
}

bool ParseDict;
ArrayOrDictStartLocCache::RAII SquareCacheRAII(*this);

ArrayOrDictStartLocCache::CachedVal v =
SquareCacheRAII.whatStartsAt(LSquareLoc);
if (v.IsCached) {
ParseDict = v.IsDict; // true for dict, false for array
} else // First time parsing this or neither dict nor array
{
BacktrackingScope Scope(*this);
auto HasDelayedDecl = State->hasDelayedDecl();
Expand All @@ -3400,6 +3407,7 @@ ParserResult<Expr> Parser::parseExprCollection() {
State->takeDelayedDeclState();
// If we have a ':', this is a dictionary literal.
ParseDict = Tok.is(tok::colon);
SquareCacheRAII.setIsDictStarting(LSquareLoc, ParseDict);
}

if (ParseDict) {
Expand Down
13 changes: 13 additions & 0 deletions test/Parse/structure_no_overflow.swift
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,16 @@ let c = ({({({({ ({({({([ ({({({({ ({({({([ ({({({({ ({({({([ ({({({({ ({({({({
})})})}) })})})}) ])})})}) })})})}) ])})})}) })})})}) ])})})}) })})})})
})})})}) })})})}) ])})})}) })})})}) ])})})}) })})})}) ])})})}) })})})})
})})})}) })})})}) ])})})}) })})})}) ])})})}) })})})}) ])})})}) })})})})

// Test that we can have the allowed # of square brackets.
// Note in bug SR-9220 this gave no error, but failed to parse in reasonable time.
// CHECK-NOT: error
let d = [[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[ [[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[
[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[ [[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[
[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[ [[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[
[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[ [[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[
1
]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]] ]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]
]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]] ]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]
]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]] ]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]
]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]] ]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder how quickly does this parse now?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

let l = [[[[[[[[[[[[[[[ ]]]]]]]]]]]]]]]
0.09 real 0.06 user 0.02 sys
let l = [[[[[[[[[[[[[[[[ ]]]]]]]]]]]]]]]]
0.11 real 0.07 user 0.02 sys
let l = [[[[[[[[[[[[[[[[[ ]]]]]]]]]]]]]]]]]
0.13 real 0.10 user 0.02 sys
let l = [[[[[[[[[[[[[[[[[[ ]]]]]]]]]]]]]]]]]]
0.18 real 0.14 user 0.03 sys
let l = [[[[[[[[[[[[[[[[[[[ ]]]]]]]]]]]]]]]]]]]
0.27 real 0.23 user 0.03 sys
let l = [[[[[[[[[[[[[[[[[[[[ ]]]]]]]]]]]]]]]]]]]]
0.47 real 0.42 user 0.04 sys
let l = [[[[[[[[[[[[[[[[[[[[[ ]]]]]]]]]]]]]]]]]]]]]
0.86 real 0.78 user 0.06 sys
let l = [[[[[[[[[[[[[[[[[[[[[[ ]]]]]]]]]]]]]]]]]]]]]]
1.69 real 1.50 user 0.11 sys
let l = [[[[[[[[[[[[[[[[[[[[[[[ ]]]]]]]]]]]]]]]]]]]]]]]
3.30 real 2.94 user 0.21 sys
let l = [[[[[[[[[[[[[[[[[[[[[[[[ ]]]]]]]]]]]]]]]]]]]]]]]]
6.47 real 5.85 user 0.39 sys
let l = [[[[[[[[[[[[[[[[[[[[[[[[[ ]]]]]]]]]]]]]]]]]]]]]]]]]
13.47 real 12.44 user 0.80 sys
let l = [[[[[[[[[[[[[[[[[[[[[[[[[[ ]]]]]]]]]]]]]]]]]]]]]]]]]]
26.47 real 24.59 user 1.61 sys
let l = [[[[[[[[[[[[[[[[[[[[[[[[[[[ ]]]]]]]]]]]]]]]]]]]]]]]]]]]
51.32 real 47.20 user 3.63 sys
let l = [[[[[[[[[[[[[[[[[[[[[[[[[[[[ ]]]]]]]]]]]]]]]]]]]]]]]]]]]]
103.45 real 95.47 user 7.31 sys
let l = [[[[[[[[[[[[[[[[[[[[[[[[[[[[[ ]]]]]]]]]]]]]]]]]]]]]]]]]]]]]
280.94 real 195.70 user 14.23 sys
let l = [[[[[[[[[[[[[[[[[[[[[[[[[[[[[[ ]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]
463.93 real 386.87 user 27.48 sys

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Huh, 463.93 is still pretty long. Maybe it's the best we can do.