Skip to content

Commit c8a6ef1

Browse files
committed
[Parse] add test about BOM + trivia
1 parent 861ee3a commit c8a6ef1

File tree

1 file changed

+200
-0
lines changed

1 file changed

+200
-0
lines changed

unittests/Parse/LexerTests.cpp

Lines changed: 200 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -260,6 +260,206 @@ TEST_F(LexerTest, ContentStartTokenIsStartOfLineUTF8BOM) {
260260
ASSERT_TRUE(Tok.isAtStartOfLine());
261261
}
262262

263+
TEST_F(LexerTest, BOMNoCommentNoTrivia) {
264+
const char *Source = "\xEF\xBB\xBF" "// comment\naaa //xx \n/* x */";
265+
266+
LangOptions LangOpts;
267+
SourceManager SourceMgr;
268+
unsigned BufferID = SourceMgr.addMemBufferCopy(StringRef(Source));
269+
270+
Lexer L(LangOpts, SourceMgr, BufferID, /*Diags=*/nullptr, /*InSILMode=*/false,
271+
CommentRetentionMode::None, TriviaRetentionMode::WithoutTrivia);
272+
273+
Token Tok;
274+
syntax::Trivia LeadingTrivia, TrailingTrivia;
275+
276+
L.lex(Tok, LeadingTrivia, TrailingTrivia);
277+
ASSERT_EQ(tok::identifier, Tok.getKind());
278+
ASSERT_EQ("aaa", Tok.getText());
279+
ASSERT_EQ(SourceMgr.getLocForOffset(BufferID, 14), Tok.getLoc());
280+
ASSERT_EQ(SourceMgr.getLocForOffset(BufferID, 14), Tok.getCommentRange().getStart());
281+
ASSERT_EQ(0u, Tok.getCommentRange().getByteLength());
282+
ASSERT_EQ((syntax::Trivia{{}}), LeadingTrivia);
283+
ASSERT_EQ((syntax::Trivia{{}}), TrailingTrivia);
284+
285+
L.lex(Tok, LeadingTrivia, TrailingTrivia);
286+
ASSERT_EQ(tok::eof, Tok.getKind());
287+
ASSERT_EQ(SourceMgr.getLocForOffset(BufferID, 31), Tok.getLoc());
288+
ASSERT_EQ(SourceMgr.getLocForOffset(BufferID, 31), Tok.getCommentRange().getStart());
289+
ASSERT_EQ(0u, Tok.getCommentRange().getByteLength());
290+
ASSERT_EQ((syntax::Trivia{{}}), LeadingTrivia);
291+
ASSERT_EQ((syntax::Trivia{{}}), TrailingTrivia);
292+
}
293+
294+
TEST_F(LexerTest, BOMTokenCommentNoTrivia) {
295+
const char *Source = "\xEF\xBB\xBF" "// comment\naaa //xx \n/* x */";
296+
297+
LangOptions LangOpts;
298+
SourceManager SourceMgr;
299+
unsigned BufferID = SourceMgr.addMemBufferCopy(StringRef(Source));
300+
301+
Lexer L(LangOpts, SourceMgr, BufferID, /*Diags=*/nullptr, /*InSILMode=*/false,
302+
CommentRetentionMode::ReturnAsTokens, TriviaRetentionMode::WithoutTrivia);
303+
304+
Token Tok;
305+
syntax::Trivia LeadingTrivia, TrailingTrivia;
306+
307+
L.lex(Tok, LeadingTrivia, TrailingTrivia);
308+
ASSERT_EQ(tok::comment, Tok.getKind());
309+
ASSERT_EQ("// comment\n", Tok.getText());
310+
ASSERT_EQ(SourceMgr.getLocForOffset(BufferID, 3), Tok.getLoc());
311+
ASSERT_EQ(SourceMgr.getLocForOffset(BufferID, 3), Tok.getCommentRange().getStart());
312+
ASSERT_EQ(0u, Tok.getCommentRange().getByteLength());
313+
ASSERT_EQ((syntax::Trivia{{}}), LeadingTrivia);
314+
ASSERT_EQ((syntax::Trivia{{}}), TrailingTrivia);
315+
316+
L.lex(Tok, LeadingTrivia, TrailingTrivia);
317+
ASSERT_EQ(tok::identifier, Tok.getKind());
318+
ASSERT_EQ("aaa", Tok.getText());
319+
ASSERT_EQ(SourceMgr.getLocForOffset(BufferID, 14), Tok.getLoc());
320+
ASSERT_EQ(SourceMgr.getLocForOffset(BufferID, 14), Tok.getCommentRange().getStart());
321+
ASSERT_EQ(0u, Tok.getCommentRange().getByteLength());
322+
ASSERT_EQ((syntax::Trivia{{}}), LeadingTrivia);
323+
ASSERT_EQ((syntax::Trivia{{}}), TrailingTrivia);
324+
325+
L.lex(Tok, LeadingTrivia, TrailingTrivia);
326+
ASSERT_EQ(tok::comment, Tok.getKind());
327+
ASSERT_EQ("//xx \n", Tok.getText());
328+
ASSERT_EQ(SourceMgr.getLocForOffset(BufferID, 18), Tok.getLoc());
329+
ASSERT_EQ(SourceMgr.getLocForOffset(BufferID, 18), Tok.getCommentRange().getStart());
330+
ASSERT_EQ(0u, Tok.getCommentRange().getByteLength());
331+
ASSERT_EQ((syntax::Trivia{{}}), LeadingTrivia);
332+
ASSERT_EQ((syntax::Trivia{{}}), TrailingTrivia);
333+
334+
L.lex(Tok, LeadingTrivia, TrailingTrivia);
335+
ASSERT_EQ(tok::comment, Tok.getKind());
336+
ASSERT_EQ("/* x */", Tok.getText());
337+
ASSERT_EQ(SourceMgr.getLocForOffset(BufferID, 24), Tok.getLoc());
338+
ASSERT_EQ(SourceMgr.getLocForOffset(BufferID, 24), Tok.getCommentRange().getStart());
339+
ASSERT_EQ(0u, Tok.getCommentRange().getByteLength());
340+
ASSERT_EQ((syntax::Trivia{{}}), LeadingTrivia);
341+
ASSERT_EQ((syntax::Trivia{{}}), TrailingTrivia);
342+
343+
L.lex(Tok, LeadingTrivia, TrailingTrivia);
344+
ASSERT_EQ(tok::eof, Tok.getKind());
345+
ASSERT_EQ(SourceMgr.getLocForOffset(BufferID, 31), Tok.getLoc());
346+
ASSERT_EQ(SourceMgr.getLocForOffset(BufferID, 31), Tok.getCommentRange().getStart());
347+
ASSERT_EQ(0u, Tok.getCommentRange().getByteLength());
348+
ASSERT_EQ((syntax::Trivia{{}}), LeadingTrivia);
349+
ASSERT_EQ((syntax::Trivia{{}}), TrailingTrivia);
350+
}
351+
352+
TEST_F(LexerTest, BOMAttachCommentNoTrivia) {
353+
const char *Source = "\xEF\xBB\xBF" "// comment\naaa //xx \n/* x */";
354+
355+
LangOptions LangOpts;
356+
SourceManager SourceMgr;
357+
unsigned BufferID = SourceMgr.addMemBufferCopy(StringRef(Source));
358+
359+
Lexer L(LangOpts, SourceMgr, BufferID, /*Diags=*/nullptr, /*InSILMode=*/false,
360+
CommentRetentionMode::AttachToNextToken, TriviaRetentionMode::WithoutTrivia);
361+
362+
Token Tok;
363+
syntax::Trivia LeadingTrivia, TrailingTrivia;
364+
365+
L.lex(Tok, LeadingTrivia, TrailingTrivia);
366+
ASSERT_EQ(tok::identifier, Tok.getKind());
367+
ASSERT_EQ("aaa", Tok.getText());
368+
ASSERT_EQ(SourceMgr.getLocForOffset(BufferID, 14), Tok.getLoc());
369+
ASSERT_EQ(SourceMgr.getLocForOffset(BufferID, 3), Tok.getCommentRange().getStart());
370+
ASSERT_EQ(10u, Tok.getCommentRange().getByteLength());
371+
ASSERT_EQ((syntax::Trivia{{}}), LeadingTrivia);
372+
ASSERT_EQ((syntax::Trivia{{}}), TrailingTrivia);
373+
374+
L.lex(Tok, LeadingTrivia, TrailingTrivia);
375+
ASSERT_EQ(tok::eof, Tok.getKind());
376+
ASSERT_EQ(SourceMgr.getLocForOffset(BufferID, 31), Tok.getLoc());
377+
ASSERT_EQ(SourceMgr.getLocForOffset(BufferID, 18), Tok.getCommentRange().getStart());
378+
ASSERT_EQ(13u, Tok.getCommentRange().getByteLength());
379+
ASSERT_EQ((syntax::Trivia{{}}), LeadingTrivia);
380+
ASSERT_EQ((syntax::Trivia{{}}), TrailingTrivia);
381+
}
382+
383+
TEST_F(LexerTest, BOMNoCommentTrivia) {
384+
const char *Source = "\xEF\xBB\xBF" "// comment\naaa //xx \n/* x */";
385+
386+
LangOptions LangOpts;
387+
SourceManager SourceMgr;
388+
unsigned BufferID = SourceMgr.addMemBufferCopy(StringRef(Source));
389+
390+
Lexer L(LangOpts, SourceMgr, BufferID, /*Diags=*/nullptr, /*InSILMode=*/false,
391+
CommentRetentionMode::None, TriviaRetentionMode::WithTrivia);
392+
393+
Token Tok;
394+
syntax::Trivia LeadingTrivia, TrailingTrivia;
395+
396+
L.lex(Tok, LeadingTrivia, TrailingTrivia);
397+
ASSERT_EQ(tok::identifier, Tok.getKind());
398+
ASSERT_EQ("aaa", Tok.getText());
399+
ASSERT_EQ(SourceMgr.getLocForOffset(BufferID, 14), Tok.getLoc());
400+
ASSERT_EQ(SourceMgr.getLocForOffset(BufferID, 14), Tok.getCommentRange().getStart());
401+
ASSERT_EQ(0u, Tok.getCommentRange().getByteLength());
402+
ASSERT_EQ((syntax::Trivia{{
403+
syntax::TriviaPiece::lineComment("// comment"),
404+
syntax::TriviaPiece::newlines(1)
405+
}}), LeadingTrivia);
406+
ASSERT_EQ((syntax::Trivia{{
407+
syntax::TriviaPiece::spaces(1)
408+
}}), TrailingTrivia);
409+
410+
L.lex(Tok, LeadingTrivia, TrailingTrivia);
411+
ASSERT_EQ(tok::eof, Tok.getKind());
412+
ASSERT_EQ(SourceMgr.getLocForOffset(BufferID, 31), Tok.getLoc());
413+
ASSERT_EQ(SourceMgr.getLocForOffset(BufferID, 31), Tok.getCommentRange().getStart());
414+
ASSERT_EQ(0u, Tok.getCommentRange().getByteLength());
415+
ASSERT_EQ((syntax::Trivia{{
416+
syntax::TriviaPiece::lineComment("//xx "),
417+
syntax::TriviaPiece::newlines(1),
418+
syntax::TriviaPiece::blockComment("/* x */")
419+
}}), LeadingTrivia);
420+
ASSERT_EQ((syntax::Trivia{{}}), TrailingTrivia);
421+
}
422+
423+
TEST_F(LexerTest, BOMAttachCommentTrivia) {
424+
const char *Source = "\xEF\xBB\xBF" "// comment\naaa //xx \n/* x */";
425+
426+
LangOptions LangOpts;
427+
SourceManager SourceMgr;
428+
unsigned BufferID = SourceMgr.addMemBufferCopy(StringRef(Source));
429+
430+
Lexer L(LangOpts, SourceMgr, BufferID, /*Diags=*/nullptr, /*InSILMode=*/false,
431+
CommentRetentionMode::AttachToNextToken, TriviaRetentionMode::WithTrivia);
432+
433+
Token Tok;
434+
syntax::Trivia LeadingTrivia, TrailingTrivia;
435+
436+
L.lex(Tok, LeadingTrivia, TrailingTrivia);
437+
ASSERT_EQ(tok::identifier, Tok.getKind());
438+
ASSERT_EQ("aaa", Tok.getText());
439+
ASSERT_EQ(SourceMgr.getLocForOffset(BufferID, 14), Tok.getLoc());
440+
ASSERT_EQ(SourceMgr.getLocForOffset(BufferID, 3), Tok.getCommentRange().getStart());
441+
ASSERT_EQ(10u, Tok.getCommentRange().getByteLength());
442+
ASSERT_EQ((syntax::Trivia{{
443+
syntax::TriviaPiece::lineComment("// comment"),
444+
syntax::TriviaPiece::newlines(1)
445+
}}), LeadingTrivia);
446+
ASSERT_EQ((syntax::Trivia{{
447+
syntax::TriviaPiece::spaces(1)
448+
}}), TrailingTrivia);
449+
450+
L.lex(Tok, LeadingTrivia, TrailingTrivia);
451+
ASSERT_EQ(tok::eof, Tok.getKind());
452+
ASSERT_EQ(SourceMgr.getLocForOffset(BufferID, 31), Tok.getLoc());
453+
ASSERT_EQ(SourceMgr.getLocForOffset(BufferID, 18), Tok.getCommentRange().getStart());
454+
ASSERT_EQ(13u, Tok.getCommentRange().getByteLength());
455+
ASSERT_EQ((syntax::Trivia{{
456+
syntax::TriviaPiece::lineComment("//xx "),
457+
syntax::TriviaPiece::newlines(1),
458+
syntax::TriviaPiece::blockComment("/* x */")
459+
}}), LeadingTrivia);
460+
ASSERT_EQ((syntax::Trivia{{}}), TrailingTrivia);
461+
}
462+
263463
TEST_F(LexerTest, RestoreBasic) {
264464
const char *Source = "aaa \t\0 bbb ccc";
265465

0 commit comments

Comments
 (0)