Skip to content

Commit 9de65db

Browse files
petebacondarwindylhunn
authored andcommitted
fix(compiler): should not break a text token on a non-valid start tag (angular#42605)
Previously the lexer would break out of consuming a text token if it contains a `<` character. Then if the next characters did not indicate an HTML syntax item, such as a tag or comment, then it would start a new text token. These consecutive text tokens are then merged into each other in a post tokenization step. In the commit before this, interpolation no longer leaks across text tokens. The approach given above to handling `<` characters that appear in text is no longer adequate. This change ensures that the lexer only breaks out of a text token if the next characters indicate a valid HTML tag, comment, CDATA etc. PR Close angular#42605
1 parent c873440 commit 9de65db

File tree

2 files changed

+57
-2
lines changed

2 files changed

+57
-2
lines changed

packages/compiler/src/ml_parser/lexer.ts

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -721,7 +721,7 @@ class _Tokenizer {
721721
}
722722

723723
private _isTextEnd(): boolean {
724-
if (this._cursor.peek() === chars.$LT || this._cursor.peek() === chars.$EOF) {
724+
if (this._isTagStart() || this._cursor.peek() === chars.$EOF) {
725725
return true;
726726
}
727727

@@ -740,6 +740,25 @@ class _Tokenizer {
740740
return false;
741741
}
742742

743+
/**
744+
* Returns true if the current cursor is pointing to the start of a tag
745+
* (opening/closing/comments/cdata/etc).
746+
*/
747+
private _isTagStart(): boolean {
748+
if (this._cursor.peek() === chars.$LT) {
749+
// We assume that `<` followed by whitespace is not the start of an HTML element.
750+
const tmp = this._cursor.clone();
751+
tmp.advance();
752+
// If the next character is alphabetic, ! nor / then it is a tag start
753+
const code = tmp.peek();
754+
if ((chars.$a <= code && code <= chars.$z) || (chars.$A <= code && code <= chars.$Z) ||
755+
code === chars.$SLASH || code === chars.$BANG) {
756+
return true;
757+
}
758+
}
759+
return false;
760+
}
761+
743762
private _readUntil(char: number): string {
744763
const start = this._cursor.clone();
745764
this._attemptUntilChar(char);

packages/compiler/test/ml_parser/lexer_spec.ts

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -612,7 +612,7 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u
612612
]);
613613
});
614614

615-
it('should parse valid start tag in interpolation', () => {
615+
it('should break out of interpolation in text token on valid start tag', () => {
616616
expect(tokenizeAndHumanizeParts('{{ a <b && c > d }}')).toEqual([
617617
[lex.TokenType.TEXT, '{{ a '],
618618
[lex.TokenType.TAG_OPEN_START, '', 'b'],
@@ -624,6 +624,42 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u
624624
]);
625625
});
626626

627+
it('should break out of interpolation in text token on valid comment', () => {
628+
expect(tokenizeAndHumanizeParts('{{ a }<!---->}')).toEqual([
629+
[lex.TokenType.TEXT, '{{ a }'],
630+
[lex.TokenType.COMMENT_START],
631+
[lex.TokenType.RAW_TEXT, ''],
632+
[lex.TokenType.COMMENT_END],
633+
[lex.TokenType.TEXT, '}'],
634+
[lex.TokenType.EOF],
635+
]);
636+
});
637+
638+
it('should break out of interpolation in text token on valid CDATA', () => {
639+
expect(tokenizeAndHumanizeParts('{{ a }<![CDATA[]]>}')).toEqual([
640+
[lex.TokenType.TEXT, '{{ a }'],
641+
[lex.TokenType.CDATA_START],
642+
[lex.TokenType.RAW_TEXT, ''],
643+
[lex.TokenType.CDATA_END],
644+
[lex.TokenType.TEXT, '}'],
645+
[lex.TokenType.EOF],
646+
]);
647+
});
648+
649+
it('should ignore invalid start tag in interpolation', () => {
650+
// Note that if the `<=` is considered an "end of text" then the following `{` would
651+
// incorrectly be considered part of an ICU.
652+
expect(tokenizeAndHumanizeParts(`<code>{{'<={'}}</code>`, {tokenizeExpansionForms: true}))
653+
.toEqual([
654+
[lex.TokenType.TAG_OPEN_START, '', 'code'],
655+
[lex.TokenType.TAG_OPEN_END],
656+
[lex.TokenType.TEXT, '{{\'<={\'}}'],
657+
[lex.TokenType.TAG_CLOSE, '', 'code'],
658+
[lex.TokenType.EOF],
659+
]);
660+
});
661+
662+
627663
it('should parse start tags quotes in place of an attribute name as text', () => {
628664
expect(tokenizeAndHumanizeParts('<t ">')).toEqual([
629665
[lex.TokenType.INCOMPLETE_TAG_OPEN, '', 't'],

0 commit comments

Comments
 (0)