Skip to content

Commit f333684

Browse files
authored
Fix unicode escapes in jsx identifiers and extended unicode characters in jsdoc (#32716)
* Fix unicode escapes in jsx identifiers and extended unicode characters in jsdoc * Support unicode escapes in JSDoc * Add tests for extended escapes
1 parent 480b739 commit f333684

20 files changed

+455
-13
lines changed

src/compiler/parser.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7305,10 +7305,14 @@ namespace ts {
73057305
return createMissingNode<Identifier>(SyntaxKind.Identifier, /*reportAtCurrentPosition*/ !message, message || Diagnostics.Identifier_expected);
73067306
}
73077307

7308+
identifierCount++;
73087309
const pos = scanner.getTokenPos();
73097310
const end = scanner.getTextPos();
73107311
const result = <Identifier>createNode(SyntaxKind.Identifier, pos);
7311-
result.escapedText = escapeLeadingUnderscores(scanner.getTokenText());
7312+
if (token() !== SyntaxKind.Identifier) {
7313+
result.originalKeywordKind = token();
7314+
}
7315+
result.escapedText = escapeLeadingUnderscores(internIdentifier(scanner.getTokenValue()));
73127316
finishNode(result, end);
73137317

73147318
nextTokenJSDoc();

src/compiler/scanner.ts

Lines changed: 38 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1015,7 +1015,7 @@ namespace ts {
10151015
}
10161016

10171017
function checkForIdentifierStartAfterNumericLiteral(numericStart: number, isScientific?: boolean) {
1018-
if (!isIdentifierStart(text.charCodeAt(pos), languageVersion)) {
1018+
if (!isIdentifierStart(codePointAt(text, pos), languageVersion)) {
10191019
return;
10201020
}
10211021

@@ -2063,17 +2063,22 @@ namespace ts {
20632063
// they allow dashes
20642064
function scanJsxIdentifier(): SyntaxKind {
20652065
if (tokenIsIdentifierOrKeyword(token)) {
2066-
const firstCharPosition = pos;
2066+
// An identifier or keyword has already been parsed - check for a `-` and then append it and everything after it to the token
2067+
// Do note that this means that `scanJsxIdentifier` effectively _mutates_ the visible token without advancing to a new token
2068+
// Any caller should be expecting this behavior and should only read the pos or token value after calling it.
20672069
while (pos < end) {
20682070
const ch = text.charCodeAt(pos);
2069-
if (ch === CharacterCodes.minus || ((firstCharPosition === pos) ? isIdentifierStart(ch, languageVersion) : isIdentifierPart(ch, languageVersion))) {
2071+
if (ch === CharacterCodes.minus) {
2072+
tokenValue += "-";
20702073
pos++;
2074+
continue;
20712075
}
2072-
else {
2076+
const oldPos = pos;
2077+
tokenValue += scanIdentifierParts(); // reuse `scanIdentifierParts` so unicode escapes are handled
2078+
if (pos === oldPos) {
20732079
break;
20742080
}
20752081
}
2076-
tokenValue += text.substring(firstCharPosition, pos);
20772082
}
20782083
return token;
20792084
}
@@ -2099,8 +2104,8 @@ namespace ts {
20992104
return token = SyntaxKind.EndOfFileToken;
21002105
}
21012106

2102-
const ch = text.charCodeAt(pos);
2103-
pos++;
2107+
const ch = codePointAt(text, pos);
2108+
pos += charSize(ch);
21042109
switch (ch) {
21052110
case CharacterCodes.tab:
21062111
case CharacterCodes.verticalTab:
@@ -2138,13 +2143,34 @@ namespace ts {
21382143
return token = SyntaxKind.DotToken;
21392144
case CharacterCodes.backtick:
21402145
return token = SyntaxKind.BacktickToken;
2141-
}
2146+
case CharacterCodes.backslash:
2147+
pos--;
2148+
const extendedCookedChar = peekExtendedUnicodeEscape();
2149+
if (extendedCookedChar >= 0 && isIdentifierStart(extendedCookedChar, languageVersion)) {
2150+
pos += 3;
2151+
tokenFlags |= TokenFlags.ExtendedUnicodeEscape;
2152+
tokenValue = scanExtendedUnicodeEscape() + scanIdentifierParts();
2153+
return token = getIdentifierToken();
2154+
}
21422155

2143-
if (isIdentifierStart(ch, ScriptTarget.Latest)) {
2144-
while (isIdentifierPart(text.charCodeAt(pos), ScriptTarget.Latest) && pos < end) {
2156+
const cookedChar = peekUnicodeEscape();
2157+
if (cookedChar >= 0 && isIdentifierStart(cookedChar, languageVersion)) {
2158+
pos += 6;
2159+
tokenValue = String.fromCharCode(cookedChar) + scanIdentifierParts();
2160+
return token = getIdentifierToken();
2161+
}
2162+
error(Diagnostics.Invalid_character);
21452163
pos++;
2146-
}
2164+
return token = SyntaxKind.Unknown;
2165+
}
2166+
2167+
if (isIdentifierStart(ch, languageVersion)) {
2168+
let char = ch;
2169+
while (pos < end && isIdentifierPart(char = codePointAt(text, pos), languageVersion)) pos += charSize(char);
21472170
tokenValue = text.substring(tokenPos, pos);
2171+
if (char === CharacterCodes.backslash) {
2172+
tokenValue += scanIdentifierParts();
2173+
}
21482174
return token = getIdentifierToken();
21492175
}
21502176
else {
@@ -2265,7 +2291,7 @@ namespace ts {
22652291

22662292
/* @internal */
22672293
function charSize(ch: number) {
2268-
if (ch > 0x10000) {
2294+
if (ch >= 0x10000) {
22692295
return 2;
22702296
}
22712297
return 1;

tests/baselines/reference/JSDocParsing/DocComments.parsesCorrectly.leadingAsterisk.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
"end": 13,
1818
"modifierFlagsCache": 0,
1919
"transformFlags": 0,
20+
"originalKeywordKind": "TypeKeyword",
2021
"escapedText": "type"
2122
},
2223
"typeExpression": {

tests/baselines/reference/JSDocParsing/DocComments.parsesCorrectly.noLeadingAsterisk.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
"end": 13,
1818
"modifierFlagsCache": 0,
1919
"transformFlags": 0,
20+
"originalKeywordKind": "TypeKeyword",
2021
"escapedText": "type"
2122
},
2223
"typeExpression": {

tests/baselines/reference/JSDocParsing/DocComments.parsesCorrectly.noReturnType.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
"end": 15,
1818
"modifierFlagsCache": 0,
1919
"transformFlags": 0,
20+
"originalKeywordKind": "ReturnKeyword",
2021
"escapedText": "return"
2122
}
2223
},

tests/baselines/reference/JSDocParsing/DocComments.parsesCorrectly.returnTag1.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
"end": 15,
1818
"modifierFlagsCache": 0,
1919
"transformFlags": 0,
20+
"originalKeywordKind": "ReturnKeyword",
2021
"escapedText": "return"
2122
},
2223
"typeExpression": {

tests/baselines/reference/JSDocParsing/DocComments.parsesCorrectly.returnTag2.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
"end": 15,
1818
"modifierFlagsCache": 0,
1919
"transformFlags": 0,
20+
"originalKeywordKind": "ReturnKeyword",
2021
"escapedText": "return"
2122
},
2223
"typeExpression": {

tests/baselines/reference/JSDocParsing/DocComments.parsesCorrectly.typeTag.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
"end": 13,
1818
"modifierFlagsCache": 0,
1919
"transformFlags": 0,
20+
"originalKeywordKind": "TypeKeyword",
2021
"escapedText": "type"
2122
},
2223
"typeExpression": {
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
//// [file.js]
2+
/**
3+
* Adds
4+
* @param {number} 𝑚
5+
* @param {number} 𝑀
6+
*/
7+
function foo(𝑚, 𝑀) {
8+
console.log(𝑀 + 𝑚);
9+
}
10+
11+
//// [file.js]
12+
/**
13+
* Adds
14+
* @param {number} 𝑚
15+
* @param {number} 𝑀
16+
*/
17+
function foo(𝑚, 𝑀) {
18+
console.log(𝑀 + 𝑚);
19+
}
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
=== tests/cases/compiler/file.js ===
2+
/**
3+
* Adds
4+
* @param {number} 𝑚
5+
* @param {number} 𝑀
6+
*/
7+
function foo(𝑚, 𝑀) {
8+
>foo : Symbol(foo, Decl(file.js, 0, 0))
9+
>𝑚 : Symbol(𝑚, Decl(file.js, 5, 13))
10+
>𝑀 : Symbol(𝑀, Decl(file.js, 5, 16))
11+
12+
console.log(𝑀 + 𝑚);
13+
>console.log : Symbol(Console.log, Decl(lib.dom.d.ts, --, --))
14+
>console : Symbol(console, Decl(lib.dom.d.ts, --, --))
15+
>log : Symbol(Console.log, Decl(lib.dom.d.ts, --, --))
16+
>𝑀 : Symbol(𝑀, Decl(file.js, 5, 16))
17+
>𝑚 : Symbol(𝑚, Decl(file.js, 5, 13))
18+
}
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
=== tests/cases/compiler/file.js ===
2+
/**
3+
* Adds
4+
* @param {number} 𝑚
5+
* @param {number} 𝑀
6+
*/
7+
function foo(𝑚, 𝑀) {
8+
>foo : (𝑚: number, 𝑀: number) => void
9+
>𝑚 : number
10+
>𝑀 : number
11+
12+
console.log(𝑀 + 𝑚);
13+
>console.log(𝑀 + 𝑚) : void
14+
>console.log : (message?: any, ...optionalParams: any[]) => void
15+
>console : Console
16+
>log : (message?: any, ...optionalParams: any[]) => void
17+
>𝑀 + 𝑚 : number
18+
>𝑀 : number
19+
>𝑚 : number
20+
}
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
//// [file.js]
2+
/**
3+
* @param {number} \u0061
4+
* @param {number} a\u0061
5+
*/
6+
function foo(a, aa) {
7+
console.log(a + aa);
8+
}
9+
10+
/**
11+
* @param {number} \u{0061}
12+
* @param {number} a\u{0061}
13+
*/
14+
function bar(a, aa) {
15+
console.log(a + aa);
16+
}
17+
18+
19+
//// [file.js]
20+
/**
21+
* @param {number} \u0061
22+
* @param {number} a\u0061
23+
*/
24+
function foo(a, aa) {
25+
console.log(a + aa);
26+
}
27+
/**
28+
* @param {number} \u{0061}
29+
* @param {number} a\u{0061}
30+
*/
31+
function bar(a, aa) {
32+
console.log(a + aa);
33+
}
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
=== tests/cases/compiler/file.js ===
2+
/**
3+
* @param {number} \u0061
4+
* @param {number} a\u0061
5+
*/
6+
function foo(a, aa) {
7+
>foo : Symbol(foo, Decl(file.js, 0, 0))
8+
>a : Symbol(a, Decl(file.js, 4, 13))
9+
>aa : Symbol(aa, Decl(file.js, 4, 15))
10+
11+
console.log(a + aa);
12+
>console.log : Symbol(Console.log, Decl(lib.dom.d.ts, --, --))
13+
>console : Symbol(console, Decl(lib.dom.d.ts, --, --))
14+
>log : Symbol(Console.log, Decl(lib.dom.d.ts, --, --))
15+
>a : Symbol(a, Decl(file.js, 4, 13))
16+
>aa : Symbol(aa, Decl(file.js, 4, 15))
17+
}
18+
19+
/**
20+
* @param {number} \u{0061}
21+
* @param {number} a\u{0061}
22+
*/
23+
function bar(a, aa) {
24+
>bar : Symbol(bar, Decl(file.js, 6, 1))
25+
>a : Symbol(a, Decl(file.js, 12, 13))
26+
>aa : Symbol(aa, Decl(file.js, 12, 15))
27+
28+
console.log(a + aa);
29+
>console.log : Symbol(Console.log, Decl(lib.dom.d.ts, --, --))
30+
>console : Symbol(console, Decl(lib.dom.d.ts, --, --))
31+
>log : Symbol(Console.log, Decl(lib.dom.d.ts, --, --))
32+
>a : Symbol(a, Decl(file.js, 12, 13))
33+
>aa : Symbol(aa, Decl(file.js, 12, 15))
34+
}
35+
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
=== tests/cases/compiler/file.js ===
2+
/**
3+
* @param {number} \u0061
4+
* @param {number} a\u0061
5+
*/
6+
function foo(a, aa) {
7+
>foo : (a: number, aa: number) => void
8+
>a : number
9+
>aa : number
10+
11+
console.log(a + aa);
12+
>console.log(a + aa) : void
13+
>console.log : (message?: any, ...optionalParams: any[]) => void
14+
>console : Console
15+
>log : (message?: any, ...optionalParams: any[]) => void
16+
>a + aa : number
17+
>a : number
18+
>aa : number
19+
}
20+
21+
/**
22+
* @param {number} \u{0061}
23+
* @param {number} a\u{0061}
24+
*/
25+
function bar(a, aa) {
26+
>bar : (a: number, aa: number) => void
27+
>a : number
28+
>aa : number
29+
30+
console.log(a + aa);
31+
>console.log(a + aa) : void
32+
>console.log : (message?: any, ...optionalParams: any[]) => void
33+
>console : Console
34+
>log : (message?: any, ...optionalParams: any[]) => void
35+
>a + aa : number
36+
>a : number
37+
>aa : number
38+
}
39+
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
//// [file.tsx]
2+
import * as React from "react";
3+
declare global {
4+
namespace JSX {
5+
interface IntrinsicElements {
6+
"a-b": any;
7+
"a-c": any;
8+
}
9+
}
10+
}
11+
const Compa = (x: {x: number}) => <div>{"" + x}</div>;
12+
13+
let a = <\u0061></a>; // works
14+
let ab = <\u0061-b></a-b>; // works
15+
let ac = <a-\u0063></a-c>; // works
16+
let compa = <Comp\u0061 x={12} />; // works
17+
18+
let a2 = <\u{0061}></a>; // works
19+
let ab2 = <\u{0061}-b></a-b>; // works
20+
let ac2 = <a-\u{0063}></a-c>; // works
21+
let compa2 = <Comp\u{0061} x={12} />; // works
22+
23+
24+
//// [file.js]
25+
import * as React from "react";
26+
const Compa = (x) => React.createElement("div", null, "" + x);
27+
let a = React.createElement("a", null); // works
28+
let ab = React.createElement("a-b", null); // works
29+
let ac = React.createElement("a-c", null); // works
30+
let compa = React.createElement(Comp\u0061, { x: 12 }); // works
31+
let a2 = React.createElement("a", null); // works
32+
let ab2 = React.createElement("a-b", null); // works
33+
let ac2 = React.createElement("a-c", null); // works
34+
let compa2 = React.createElement(Comp\u{0061}, { x: 12 }); // works

0 commit comments

Comments
 (0)