Skip to content

Commit 6181da4

Browse files
committed
clang-format: [JS] nested and tagged template strings.
JavaScript template strings can be nested arbitrarily: foo = `text ${es.map(e => { return `<${e}>`; })} text`; This change lexes nested template strings using a stack of lexer states to correctly switch back to template string lexing on closing braces. Also, reuse the same stack for the token-stashed logic. Reviewers: djasper Subscribers: cfe-commits, klimek Differential Revision: https://reviews.llvm.org/D22431 llvm-svn: 279727
1 parent 86ce267 commit 6181da4

File tree

4 files changed

+84
-24
lines changed

4 files changed

+84
-24
lines changed

clang/lib/Format/FormatTokenLexer.cpp

Lines changed: 43 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,11 @@ namespace format {
2626
FormatTokenLexer::FormatTokenLexer(const SourceManager &SourceMgr, FileID ID,
2727
const FormatStyle &Style,
2828
encoding::Encoding Encoding)
29-
: FormatTok(nullptr), IsFirstToken(true), GreaterStashed(false),
30-
LessStashed(false), Column(0), TrailingWhitespace(0),
31-
SourceMgr(SourceMgr), ID(ID), Style(Style),
32-
IdentTable(getFormattingLangOpts(Style)), Keywords(IdentTable),
33-
Encoding(Encoding), FirstInLineIndex(0), FormattingDisabled(false),
34-
MacroBlockBeginRegex(Style.MacroBlockBegin),
29+
: FormatTok(nullptr), IsFirstToken(true), StateStack({LexerState::NORMAL}),
30+
Column(0), TrailingWhitespace(0), SourceMgr(SourceMgr), ID(ID),
31+
Style(Style), IdentTable(getFormattingLangOpts(Style)),
32+
Keywords(IdentTable), Encoding(Encoding), FirstInLineIndex(0),
33+
FormattingDisabled(false), MacroBlockBeginRegex(Style.MacroBlockBegin),
3534
MacroBlockEndRegex(Style.MacroBlockEnd) {
3635
Lex.reset(new Lexer(ID, SourceMgr.getBuffer(ID), SourceMgr,
3736
getFormattingLangOpts(Style)));
@@ -49,7 +48,7 @@ ArrayRef<FormatToken *> FormatTokenLexer::lex() {
4948
Tokens.push_back(getNextToken());
5049
if (Style.Language == FormatStyle::LK_JavaScript) {
5150
tryParseJSRegexLiteral();
52-
tryParseTemplateString();
51+
handleTemplateStrings();
5352
}
5453
tryMergePreviousTokens();
5554
if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline)
@@ -228,17 +227,42 @@ void FormatTokenLexer::tryParseJSRegexLiteral() {
228227
resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset)));
229228
}
230229

231-
void FormatTokenLexer::tryParseTemplateString() {
230+
void FormatTokenLexer::handleTemplateStrings() {
232231
FormatToken *BacktickToken = Tokens.back();
233-
if (!BacktickToken->is(tok::unknown) || BacktickToken->TokenText != "`")
232+
233+
if (BacktickToken->is(tok::l_brace)) {
234+
StateStack.push(LexerState::NORMAL);
234235
return;
236+
}
237+
if (BacktickToken->is(tok::r_brace)) {
238+
StateStack.pop();
239+
if (StateStack.top() != LexerState::TEMPLATE_STRING)
240+
return;
241+
// If back in TEMPLATE_STRING, fallthrough and continue parsing the
242+
} else if (BacktickToken->is(tok::unknown) &&
243+
BacktickToken->TokenText == "`") {
244+
StateStack.push(LexerState::TEMPLATE_STRING);
245+
} else {
246+
return; // Not actually a template
247+
}
235248

236249
// 'Manually' lex ahead in the current file buffer.
237250
const char *Offset = Lex->getBufferLocation();
238251
const char *TmplBegin = Offset - BacktickToken->TokenText.size(); // at "`"
239-
for (; Offset != Lex->getBuffer().end() && *Offset != '`'; ++Offset) {
240-
if (*Offset == '\\')
252+
for (; Offset != Lex->getBuffer().end(); ++Offset) {
253+
if (Offset[0] == '`') {
254+
StateStack.pop();
255+
break;
256+
}
257+
if (Offset[0] == '\\') {
241258
++Offset; // Skip the escaped character.
259+
} else if (Offset + 1 < Lex->getBuffer().end() && Offset[0] == '$' &&
260+
Offset[1] == '{') {
261+
// '${' introduces an expression interpolation in the template string.
262+
StateStack.push(LexerState::NORMAL);
263+
++Offset;
264+
break;
265+
}
242266
}
243267

244268
StringRef LiteralText(TmplBegin, Offset - TmplBegin + 1);
@@ -262,7 +286,10 @@ void FormatTokenLexer::tryParseTemplateString() {
262286
Style.TabWidth, Encoding);
263287
}
264288

265-
resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset + 1)));
289+
SourceLocation loc = Offset < Lex->getBuffer().end()
290+
? Lex->getSourceLocation(Offset + 1)
291+
: SourceMgr.getLocForEndOfFile(ID);
292+
resetLexer(SourceMgr.getFileOffset(loc));
266293
}
267294

268295
bool FormatTokenLexer::tryMerge_TMacro() {
@@ -384,12 +411,8 @@ FormatToken *FormatTokenLexer::getStashedToken() {
384411
}
385412

386413
FormatToken *FormatTokenLexer::getNextToken() {
387-
if (GreaterStashed) {
388-
GreaterStashed = false;
389-
return getStashedToken();
390-
}
391-
if (LessStashed) {
392-
LessStashed = false;
414+
if (StateStack.top() == LexerState::TOKEN_STASHED) {
415+
StateStack.pop();
393416
return getStashedToken();
394417
}
395418

@@ -500,11 +523,11 @@ FormatToken *FormatTokenLexer::getNextToken() {
500523
} else if (FormatTok->Tok.is(tok::greatergreater)) {
501524
FormatTok->Tok.setKind(tok::greater);
502525
FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);
503-
GreaterStashed = true;
526+
StateStack.push(LexerState::TOKEN_STASHED);
504527
} else if (FormatTok->Tok.is(tok::lessless)) {
505528
FormatTok->Tok.setKind(tok::less);
506529
FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);
507-
LessStashed = true;
530+
StateStack.push(LexerState::TOKEN_STASHED);
508531
}
509532

510533
// Now FormatTok is the next non-whitespace token.

clang/lib/Format/FormatTokenLexer.h

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,17 @@
2323
#include "clang/Format/Format.h"
2424
#include "llvm/Support/Regex.h"
2525

26+
#include <stack>
27+
2628
namespace clang {
2729
namespace format {
2830

31+
enum LexerState {
32+
NORMAL,
33+
TEMPLATE_STRING,
34+
TOKEN_STASHED,
35+
};
36+
2937
class FormatTokenLexer {
3038
public:
3139
FormatTokenLexer(const SourceManager &SourceMgr, FileID ID,
@@ -53,7 +61,16 @@ class FormatTokenLexer {
5361
// its text if successful.
5462
void tryParseJSRegexLiteral();
5563

56-
void tryParseTemplateString();
64+
// Handles JavaScript template strings.
65+
//
66+
// JavaScript template strings use backticks ('`') as delimiters, and allow
67+
// embedding expressions nested in ${expr-here}. Template strings can be
68+
// nested recursively, i.e. expressions can contain template strings in turn.
69+
//
70+
// The code below parses starting from a backtick, up to a closing backtick or
71+
// an opening ${. It also maintains a stack of lexing contexts to handle
72+
// nested template parts by balancing curly braces.
73+
void handleTemplateStrings();
5774

5875
bool tryMerge_TMacro();
5976

@@ -65,7 +82,7 @@ class FormatTokenLexer {
6582

6683
FormatToken *FormatTok;
6784
bool IsFirstToken;
68-
bool GreaterStashed, LessStashed;
85+
std::stack<LexerState> StateStack;
6986
unsigned Column;
7087
unsigned TrailingWhitespace;
7188
std::unique_ptr<Lexer> Lex;

clang/lib/Format/TokenAnnotator.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -858,7 +858,7 @@ class AnnotatingParser {
858858
if (!CurrentToken->isOneOf(TT_LambdaLSquare, TT_ForEachMacro,
859859
TT_FunctionLBrace, TT_ImplicitStringLiteral,
860860
TT_InlineASMBrace, TT_JsFatArrow, TT_LambdaArrow,
861-
TT_RegexLiteral))
861+
TT_RegexLiteral, TT_TemplateString))
862862
CurrentToken->Type = TT_Unknown;
863863
CurrentToken->Role.reset();
864864
CurrentToken->MatchingParen = nullptr;
@@ -1816,6 +1816,9 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
18161816
return 100;
18171817
if (Left.is(TT_JsTypeColon))
18181818
return 35;
1819+
if ((Left.is(TT_TemplateString) && Left.TokenText.endswith("${")) ||
1820+
(Right.is(TT_TemplateString) && Right.TokenText.startswith("}")))
1821+
return 100;
18191822
}
18201823

18211824
if (Left.is(tok::comma) || (Right.is(tok::identifier) && Right.Next &&
@@ -2114,6 +2117,11 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
21142117
} else if (Style.Language == FormatStyle::LK_JavaScript) {
21152118
if (Left.is(TT_JsFatArrow))
21162119
return true;
2120+
if ((Left.is(TT_TemplateString) && Left.TokenText.endswith("${")) ||
2121+
(Right.is(TT_TemplateString) && Right.TokenText.startswith("}")))
2122+
return false;
2123+
if (Left.is(tok::identifier) && Right.is(TT_TemplateString))
2124+
return false;
21172125
if (Right.is(tok::star) &&
21182126
Left.isOneOf(Keywords.kw_function, Keywords.kw_yield))
21192127
return false;

clang/unittests/Format/FormatTestJS.cpp

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1122,7 +1122,7 @@ TEST_F(FormatTestJS, ImportWrapping) {
11221122
TEST_F(FormatTestJS, TemplateStrings) {
11231123
// Keeps any whitespace/indentation within the template string.
11241124
verifyFormat("var x = `hello\n"
1125-
" ${ name }\n"
1125+
" ${name}\n"
11261126
" !`;",
11271127
"var x = `hello\n"
11281128
" ${ name }\n"
@@ -1206,6 +1206,18 @@ TEST_F(FormatTestJS, TemplateStrings) {
12061206
"var y;",
12071207
"var x = ` \\` a`;\n"
12081208
"var y;");
1209+
// Escaped dollar.
1210+
verifyFormat("var x = ` \\${foo}`;\n");
1211+
}
1212+
1213+
TEST_F(FormatTestJS, NestedTemplateStrings) {
1214+
verifyFormat(
1215+
"var x = `<ul>${xs.map(x => `<li>${x}</li>`).join('\\n')}</ul>`;");
1216+
verifyFormat("var x = `he${({text: 'll'}.text)}o`;");
1217+
}
1218+
1219+
TEST_F(FormatTestJS, TaggedTemplateStrings) {
1220+
verifyFormat("var x = html`<ul>`;");
12091221
}
12101222

12111223
TEST_F(FormatTestJS, CastSyntax) {

0 commit comments

Comments
 (0)