|
35 | 35 | #include "llvm/ADT/PointerUnion.h"
|
36 | 36 | #include "llvm/ADT/Twine.h"
|
37 | 37 |
|
| 38 | +static void getStringPartTokens(const swift::Token &Tok, |
| 39 | + const swift::LangOptions &LangOpts, |
| 40 | + const swift::SourceManager &SM, int BufID, |
| 41 | + std::vector<swift::Token> &Toks); |
| 42 | + |
| 43 | +namespace swift { |
| 44 | +template <typename DF> |
| 45 | +void tokenize(const LangOptions &LangOpts, const SourceManager &SM, |
| 46 | + unsigned BufferID, unsigned Offset, unsigned EndOffset, |
| 47 | + CommentRetentionMode RetainComments, |
| 48 | + TriviaRetentionMode TriviaRetention, |
| 49 | + bool TokenizeInterpolatedString, ArrayRef<Token> SplitTokens, |
| 50 | + DF &&DestFunc) { |
| 51 | + assert((TriviaRetention != TriviaRetentionMode::WithTrivia || |
| 52 | + !TokenizeInterpolatedString) && |
| 53 | + "string interpolation with trivia is not implemented yet"); |
| 54 | + |
| 55 | + if (Offset == 0 && EndOffset == 0) |
| 56 | + EndOffset = SM.getRangeForBuffer(BufferID).getByteLength(); |
| 57 | + |
| 58 | + Lexer L(LangOpts, SM, BufferID, /*Diags=*/nullptr, /*InSILMode=*/false, |
| 59 | + RetainComments, TriviaRetention, Offset, EndOffset); |
| 60 | + |
| 61 | + auto TokComp = [&](const Token &A, const Token &B) { |
| 62 | + return SM.isBeforeInBuffer(A.getLoc(), B.getLoc()); |
| 63 | + }; |
| 64 | + |
| 65 | + std::set<Token, decltype(TokComp)> ResetTokens(TokComp); |
| 66 | + for (auto C = SplitTokens.begin(), E = SplitTokens.end(); C != E; ++C) { |
| 67 | + ResetTokens.insert(*C); |
| 68 | + } |
| 69 | + |
| 70 | + Token Tok; |
| 71 | + syntax::Trivia LeadingTrivia, TrailingTrivia; |
| 72 | + do { |
| 73 | + L.lex(Tok, LeadingTrivia, TrailingTrivia); |
| 74 | + |
| 75 | + // If the token has the same location as a reset location, |
| 76 | + // reset the token stream |
| 77 | + auto F = ResetTokens.find(Tok); |
| 78 | + if (F != ResetTokens.end()) { |
| 79 | + assert(F->isNot(tok::string_literal)); |
| 80 | + |
| 81 | + DestFunc(*F, syntax::Trivia(), syntax::Trivia()); |
| 82 | + |
| 83 | + auto NewState = L.getStateForBeginningOfTokenLoc( |
| 84 | + F->getLoc().getAdvancedLoc(F->getLength())); |
| 85 | + L.restoreState(NewState); |
| 86 | + continue; |
| 87 | + } |
| 88 | + |
| 89 | + if (Tok.is(tok::string_literal) && TokenizeInterpolatedString) { |
| 90 | + std::vector<Token> StrTokens; |
| 91 | + getStringPartTokens(Tok, LangOpts, SM, BufferID, StrTokens); |
| 92 | + for (auto &StrTok : StrTokens) { |
| 93 | + DestFunc(StrTok, syntax::Trivia(), syntax::Trivia()); |
| 94 | + } |
| 95 | + } else { |
| 96 | + DestFunc(Tok, LeadingTrivia, TrailingTrivia); |
| 97 | + } |
| 98 | + |
| 99 | + } while (Tok.getKind() != tok::eof); |
| 100 | +} |
| 101 | +} // namespace swift |
| 102 | + |
38 | 103 | using namespace swift;
|
39 | 104 | using namespace swift::syntax;
|
40 | 105 |
|
@@ -217,82 +282,43 @@ std::vector<Token> swift::tokenize(const LangOptions &LangOpts,
|
217 | 282 | bool KeepComments,
|
218 | 283 | bool TokenizeInterpolatedString,
|
219 | 284 | ArrayRef<Token> SplitTokens) {
|
220 |
| - if (Offset == 0 && EndOffset == 0) |
221 |
| - EndOffset = SM.getRangeForBuffer(BufferID).getByteLength(); |
222 |
| - |
223 |
| - Lexer L(LangOpts, SM, BufferID, /*Diags=*/nullptr, /*InSILMode=*/false, |
224 |
| - KeepComments ? CommentRetentionMode::ReturnAsTokens |
225 |
| - : CommentRetentionMode::AttachToNextToken, |
226 |
| - TriviaRetentionMode::WithoutTrivia, |
227 |
| - Offset, EndOffset); |
228 |
| - |
229 |
| - auto TokComp = [&] (const Token &A, const Token &B) { |
230 |
| - return SM.isBeforeInBuffer(A.getLoc(), B.getLoc()); |
231 |
| - }; |
232 |
| - |
233 |
| - std::set<Token, decltype(TokComp)> ResetTokens(TokComp); |
234 |
| - for (auto C = SplitTokens.begin(), E = SplitTokens.end(); C != E; ++C) { |
235 |
| - ResetTokens.insert(*C); |
236 |
| - } |
237 |
| - |
238 | 285 | std::vector<Token> Tokens;
|
239 |
| - Trivia LeadingTrivia, TrailingTrivia; |
240 |
| - do { |
241 |
| - Tokens.emplace_back(); |
242 |
| - L.lex(Tokens.back(), LeadingTrivia, TrailingTrivia); |
243 | 286 |
|
244 |
| - // If the token has the same location as a reset location, |
245 |
| - // reset the token stream |
246 |
| - auto F = ResetTokens.find(Tokens.back()); |
247 |
| - if (F != ResetTokens.end()) { |
248 |
| - Tokens.back() = *F; |
249 |
| - assert(Tokens.back().isNot(tok::string_literal)); |
250 |
| - |
251 |
| - auto NewState = L.getStateForBeginningOfTokenLoc( |
252 |
| - F->getLoc().getAdvancedLoc(F->getLength())); |
253 |
| - L.restoreState(NewState); |
254 |
| - continue; |
255 |
| - } |
| 287 | + tokenize(LangOpts, SM, BufferID, Offset, EndOffset, |
| 288 | + KeepComments ? CommentRetentionMode::ReturnAsTokens |
| 289 | + : CommentRetentionMode::AttachToNextToken, |
| 290 | + TriviaRetentionMode::WithoutTrivia, TokenizeInterpolatedString, |
| 291 | + SplitTokens, |
| 292 | + [&](const Token &Tok, const Trivia &LeadingTrivia, |
| 293 | + const Trivia &TrailingTrivia) { Tokens.push_back(Tok); }); |
256 | 294 |
|
257 |
| - if (Tokens.back().is(tok::string_literal) && TokenizeInterpolatedString) { |
258 |
| - Token StrTok = Tokens.back(); |
259 |
| - Tokens.pop_back(); |
260 |
| - getStringPartTokens(StrTok, LangOpts, SM, BufferID, Tokens); |
261 |
| - } |
262 |
| - } while (Tokens.back().isNot(tok::eof)); |
| 295 | + assert(Tokens.back().is(tok::eof)); |
263 | 296 | Tokens.pop_back(); // Remove EOF.
|
264 | 297 | return Tokens;
|
265 | 298 | }
|
266 | 299 |
|
267 |
| -// TODO: Refactor into common implementation with swift::tokenize. |
268 |
| -std::vector<std::pair<RC<syntax::RawTokenSyntax>, |
269 |
| - syntax::AbsolutePosition>> |
270 |
| -swift::tokenizeWithTrivia(const LangOptions &LangOpts, |
271 |
| - const SourceManager &SM, |
272 |
| - unsigned BufferID, |
273 |
| - unsigned Offset, |
| 300 | +std::vector<std::pair<RC<syntax::RawTokenSyntax>, syntax::AbsolutePosition>> |
| 301 | +swift::tokenizeWithTrivia(const LangOptions &LangOpts, const SourceManager &SM, |
| 302 | + unsigned BufferID, unsigned Offset, |
274 | 303 | unsigned EndOffset) {
|
275 |
| - if (Offset == 0 && EndOffset == 0) |
276 |
| - EndOffset = SM.getRangeForBuffer(BufferID).getByteLength(); |
277 |
| - |
278 |
| - Lexer L(LangOpts, SM, BufferID, /*Diags=*/nullptr, /*InSILMode=*/false, |
279 |
| - CommentRetentionMode::AttachToNextToken, |
280 |
| - TriviaRetentionMode::WithTrivia, |
281 |
| - Offset, EndOffset); |
282 |
| - std::vector<std::pair<RC<syntax::RawTokenSyntax>, |
283 |
| - syntax::AbsolutePosition>> Tokens; |
| 304 | + std::vector<std::pair<RC<syntax::RawTokenSyntax>, syntax::AbsolutePosition>> |
| 305 | + Tokens; |
284 | 306 | syntax::AbsolutePosition RunningPos;
|
285 |
| - Token Tok; |
286 |
| - Trivia LeadingTrivia, TrailingTrivia; |
287 |
| - do { |
288 |
| - L.lex(Tok, LeadingTrivia, TrailingTrivia); |
289 |
| - auto ThisToken = RawTokenSyntax::make(Tok.getKind(), Tok.getText(), |
290 |
| - SourcePresence::Present, LeadingTrivia, |
291 |
| - TrailingTrivia); |
292 | 307 |
|
293 |
| - auto ThisTokenPos = ThisToken->accumulateAbsolutePosition(RunningPos); |
294 |
| - Tokens.push_back({ThisToken, ThisTokenPos}); |
295 |
| - } while (Tokens.back().first->isNot(tok::eof)); |
| 308 | + tokenize( |
| 309 | + LangOpts, SM, BufferID, Offset, EndOffset, |
| 310 | + CommentRetentionMode::AttachToNextToken, TriviaRetentionMode::WithTrivia, |
| 311 | + /*TokenizeInterpolatedString=*/false, |
| 312 | + /*SplitTokens=*/ArrayRef<Token>(), |
| 313 | + [&](const Token &Tok, const Trivia &LeadingTrivia, |
| 314 | + const Trivia &TrailingTrivia) { |
| 315 | + auto ThisToken = RawTokenSyntax::make(Tok.getKind(), Tok.getText(), |
| 316 | + SourcePresence::Present, |
| 317 | + LeadingTrivia, TrailingTrivia); |
| 318 | + |
| 319 | + auto ThisTokenPos = ThisToken->accumulateAbsolutePosition(RunningPos); |
| 320 | + Tokens.push_back({ThisToken, ThisTokenPos}); |
| 321 | + }); |
296 | 322 |
|
297 | 323 | return Tokens;
|
298 | 324 | }
|
|
0 commit comments