Skip to content

Commit 461c764

Browse files
omochinkcsgexi
authored andcommitted
[Parse] make tokenizeXxx into common
1 parent 4794ead commit 461c764

File tree

1 file changed

+92
-66
lines changed

1 file changed

+92
-66
lines changed

lib/Parse/Parser.cpp

Lines changed: 92 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,71 @@
3535
#include "llvm/ADT/PointerUnion.h"
3636
#include "llvm/ADT/Twine.h"
3737

38+
static void getStringPartTokens(const swift::Token &Tok,
39+
const swift::LangOptions &LangOpts,
40+
const swift::SourceManager &SM, int BufID,
41+
std::vector<swift::Token> &Toks);
42+
43+
namespace swift {
44+
template <typename DF>
45+
void tokenize(const LangOptions &LangOpts, const SourceManager &SM,
46+
unsigned BufferID, unsigned Offset, unsigned EndOffset,
47+
CommentRetentionMode RetainComments,
48+
TriviaRetentionMode TriviaRetention,
49+
bool TokenizeInterpolatedString, ArrayRef<Token> SplitTokens,
50+
DF &&DestFunc) {
51+
assert((TriviaRetention != TriviaRetentionMode::WithTrivia ||
52+
!TokenizeInterpolatedString) &&
53+
"string interpolation with trivia is not implemented yet");
54+
55+
if (Offset == 0 && EndOffset == 0)
56+
EndOffset = SM.getRangeForBuffer(BufferID).getByteLength();
57+
58+
Lexer L(LangOpts, SM, BufferID, /*Diags=*/nullptr, /*InSILMode=*/false,
59+
RetainComments, TriviaRetention, Offset, EndOffset);
60+
61+
auto TokComp = [&](const Token &A, const Token &B) {
62+
return SM.isBeforeInBuffer(A.getLoc(), B.getLoc());
63+
};
64+
65+
std::set<Token, decltype(TokComp)> ResetTokens(TokComp);
66+
for (auto C = SplitTokens.begin(), E = SplitTokens.end(); C != E; ++C) {
67+
ResetTokens.insert(*C);
68+
}
69+
70+
Token Tok;
71+
syntax::Trivia LeadingTrivia, TrailingTrivia;
72+
do {
73+
L.lex(Tok, LeadingTrivia, TrailingTrivia);
74+
75+
// If the token has the same location as a reset location,
76+
// reset the token stream
77+
auto F = ResetTokens.find(Tok);
78+
if (F != ResetTokens.end()) {
79+
assert(F->isNot(tok::string_literal));
80+
81+
DestFunc(*F, syntax::Trivia(), syntax::Trivia());
82+
83+
auto NewState = L.getStateForBeginningOfTokenLoc(
84+
F->getLoc().getAdvancedLoc(F->getLength()));
85+
L.restoreState(NewState);
86+
continue;
87+
}
88+
89+
if (Tok.is(tok::string_literal) && TokenizeInterpolatedString) {
90+
std::vector<Token> StrTokens;
91+
getStringPartTokens(Tok, LangOpts, SM, BufferID, StrTokens);
92+
for (auto &StrTok : StrTokens) {
93+
DestFunc(StrTok, syntax::Trivia(), syntax::Trivia());
94+
}
95+
} else {
96+
DestFunc(Tok, LeadingTrivia, TrailingTrivia);
97+
}
98+
99+
} while (Tok.getKind() != tok::eof);
100+
}
101+
} // namespace swift
102+
38103
using namespace swift;
39104
using namespace swift::syntax;
40105

@@ -217,82 +282,43 @@ std::vector<Token> swift::tokenize(const LangOptions &LangOpts,
217282
bool KeepComments,
218283
bool TokenizeInterpolatedString,
219284
ArrayRef<Token> SplitTokens) {
220-
if (Offset == 0 && EndOffset == 0)
221-
EndOffset = SM.getRangeForBuffer(BufferID).getByteLength();
222-
223-
Lexer L(LangOpts, SM, BufferID, /*Diags=*/nullptr, /*InSILMode=*/false,
224-
KeepComments ? CommentRetentionMode::ReturnAsTokens
225-
: CommentRetentionMode::AttachToNextToken,
226-
TriviaRetentionMode::WithoutTrivia,
227-
Offset, EndOffset);
228-
229-
auto TokComp = [&] (const Token &A, const Token &B) {
230-
return SM.isBeforeInBuffer(A.getLoc(), B.getLoc());
231-
};
232-
233-
std::set<Token, decltype(TokComp)> ResetTokens(TokComp);
234-
for (auto C = SplitTokens.begin(), E = SplitTokens.end(); C != E; ++C) {
235-
ResetTokens.insert(*C);
236-
}
237-
238285
std::vector<Token> Tokens;
239-
Trivia LeadingTrivia, TrailingTrivia;
240-
do {
241-
Tokens.emplace_back();
242-
L.lex(Tokens.back(), LeadingTrivia, TrailingTrivia);
243286

244-
// If the token has the same location as a reset location,
245-
// reset the token stream
246-
auto F = ResetTokens.find(Tokens.back());
247-
if (F != ResetTokens.end()) {
248-
Tokens.back() = *F;
249-
assert(Tokens.back().isNot(tok::string_literal));
250-
251-
auto NewState = L.getStateForBeginningOfTokenLoc(
252-
F->getLoc().getAdvancedLoc(F->getLength()));
253-
L.restoreState(NewState);
254-
continue;
255-
}
287+
tokenize(LangOpts, SM, BufferID, Offset, EndOffset,
288+
KeepComments ? CommentRetentionMode::ReturnAsTokens
289+
: CommentRetentionMode::AttachToNextToken,
290+
TriviaRetentionMode::WithoutTrivia, TokenizeInterpolatedString,
291+
SplitTokens,
292+
[&](const Token &Tok, const Trivia &LeadingTrivia,
293+
const Trivia &TrailingTrivia) { Tokens.push_back(Tok); });
256294

257-
if (Tokens.back().is(tok::string_literal) && TokenizeInterpolatedString) {
258-
Token StrTok = Tokens.back();
259-
Tokens.pop_back();
260-
getStringPartTokens(StrTok, LangOpts, SM, BufferID, Tokens);
261-
}
262-
} while (Tokens.back().isNot(tok::eof));
295+
assert(Tokens.back().is(tok::eof));
263296
Tokens.pop_back(); // Remove EOF.
264297
return Tokens;
265298
}
266299

267-
// TODO: Refactor into common implementation with swift::tokenize.
268-
std::vector<std::pair<RC<syntax::RawTokenSyntax>,
269-
syntax::AbsolutePosition>>
270-
swift::tokenizeWithTrivia(const LangOptions &LangOpts,
271-
const SourceManager &SM,
272-
unsigned BufferID,
273-
unsigned Offset,
300+
std::vector<std::pair<RC<syntax::RawTokenSyntax>, syntax::AbsolutePosition>>
301+
swift::tokenizeWithTrivia(const LangOptions &LangOpts, const SourceManager &SM,
302+
unsigned BufferID, unsigned Offset,
274303
unsigned EndOffset) {
275-
if (Offset == 0 && EndOffset == 0)
276-
EndOffset = SM.getRangeForBuffer(BufferID).getByteLength();
277-
278-
Lexer L(LangOpts, SM, BufferID, /*Diags=*/nullptr, /*InSILMode=*/false,
279-
CommentRetentionMode::AttachToNextToken,
280-
TriviaRetentionMode::WithTrivia,
281-
Offset, EndOffset);
282-
std::vector<std::pair<RC<syntax::RawTokenSyntax>,
283-
syntax::AbsolutePosition>> Tokens;
304+
std::vector<std::pair<RC<syntax::RawTokenSyntax>, syntax::AbsolutePosition>>
305+
Tokens;
284306
syntax::AbsolutePosition RunningPos;
285-
Token Tok;
286-
Trivia LeadingTrivia, TrailingTrivia;
287-
do {
288-
L.lex(Tok, LeadingTrivia, TrailingTrivia);
289-
auto ThisToken = RawTokenSyntax::make(Tok.getKind(), Tok.getText(),
290-
SourcePresence::Present, LeadingTrivia,
291-
TrailingTrivia);
292307

293-
auto ThisTokenPos = ThisToken->accumulateAbsolutePosition(RunningPos);
294-
Tokens.push_back({ThisToken, ThisTokenPos});
295-
} while (Tokens.back().first->isNot(tok::eof));
308+
tokenize(
309+
LangOpts, SM, BufferID, Offset, EndOffset,
310+
CommentRetentionMode::AttachToNextToken, TriviaRetentionMode::WithTrivia,
311+
/*TokenizeInterpolatedString=*/false,
312+
/*SplitTokens=*/ArrayRef<Token>(),
313+
[&](const Token &Tok, const Trivia &LeadingTrivia,
314+
const Trivia &TrailingTrivia) {
315+
auto ThisToken = RawTokenSyntax::make(Tok.getKind(), Tok.getText(),
316+
SourcePresence::Present,
317+
LeadingTrivia, TrailingTrivia);
318+
319+
auto ThisTokenPos = ThisToken->accumulateAbsolutePosition(RunningPos);
320+
Tokens.push_back({ThisToken, ThisTokenPos});
321+
});
296322

297323
return Tokens;
298324
}

0 commit comments

Comments
 (0)