Skip to content

Commit 02f7cd5

Browse files
committed
generated zero-width characters
1 parent dc96342 commit 02f7cd5

File tree

1 file changed

+134
-24
lines changed

1 file changed

+134
-24
lines changed

lib/Parse/Lexer.cpp

Lines changed: 134 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1206,35 +1206,145 @@ static bool maybeConsumeNewlineEscape(const char *&CurPtr, ssize_t Offset) {
12061206
}
12071207
}
12081208

1209+
const static uint32_t ZeroWidthC[] = {
1210+
// Characters which don't appear to be visible (sic) follow.
1211+
0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008,
1212+
0x000b, 0x000e, 0x000f, 0x0010, 0x0011, 0x0012, 0x0013, 0x0014,
1213+
0x0015, 0x0016, 0x0017, 0x0018, 0x0019, 0x001a, 0x001b, 0x001c,
1214+
0x001d, 0x001e, 0x001f, 0x007f, 0x0080, 0x0081, 0x0082, 0x0083,
1215+
0x0084, 0x0086, 0x0087, 0x0088, 0x0089, 0x008a, 0x008b, 0x008c,
1216+
0x008d, 0x008e, 0x008f, 0x0090, 0x0091, 0x0092, 0x0093, 0x0094,
1217+
0x0095, 0x0096, 0x0097, 0x0098, 0x0099, 0x009a, 0x009b, 0x009c,
1218+
0x009d, 0x009e, 0x009f, 0x00ad, 0x0300, 0x0301, 0x0302, 0x0303,
1219+
0x0304, 0x0306, 0x0307, 0x0308, 0x0309, 0x030a, 0x030b, 0x030c,
1220+
0x030f, 0x0311, 0x031b, 0x0323, 0x0324, 0x0325, 0x0326, 0x0327,
1221+
0x0328, 0x032d, 0x032e, 0x0330, 0x0331, 0x0332, 0x034f, 0x055f,
1222+
0x0610, 0x0611, 0x0612, 0x0613, 0x0614, 0x0615, 0x0616, 0x0617,
1223+
0x0618, 0x0619, 0x061a, 0x061c, 0x064b, 0x064c, 0x064d, 0x064e,
1224+
0x064f, 0x0650, 0x0651, 0x0652, 0x0653, 0x0654, 0x0655, 0x0656,
1225+
0x0657, 0x0658, 0x0659, 0x065a, 0x065b, 0x065c, 0x065d, 0x065e,
1226+
0x065f, 0x0670, 0x06d6, 0x06d7, 0x06d8, 0x06d9, 0x06da, 0x06db,
1227+
0x06dc, 0x06df, 0x06e0, 0x06e1, 0x06e2, 0x06e3, 0x06e4, 0x06e6,
1228+
0x06e7, 0x06e8, 0x06ea, 0x06eb, 0x06ec, 0x06ed, 0x070f, 0x0711,
1229+
0x0730, 0x0731, 0x0732, 0x0733, 0x0734, 0x0735, 0x0736, 0x0737,
1230+
0x0738, 0x0739, 0x073a, 0x073b, 0x073c, 0x073d, 0x073e, 0x073f,
1231+
0x0740, 0x0741, 0x0742, 0x0743, 0x0744, 0x0745, 0x0746, 0x0747,
1232+
0x0748, 0x0749, 0x074a, 0x07a6, 0x07a7, 0x07a8, 0x07a9, 0x07aa,
1233+
0x07ab, 0x07ac, 0x07ad, 0x07ae, 0x07af, 0x07b0, 0x07eb, 0x07ec,
1234+
0x07ed, 0x07ee, 0x07ef, 0x07f0, 0x07f1, 0x07f2, 0x07f3, 0x0816,
1235+
0x0817, 0x0818, 0x0819, 0x081b, 0x081c, 0x081d, 0x081e, 0x081f,
1236+
0x0820, 0x0821, 0x0822, 0x0823, 0x0825, 0x0826, 0x0827, 0x0829,
1237+
0x082a, 0x082b, 0x082c, 0x082d, 0x0858, 0x0859, 0x085a, 0x085b,
1238+
0x08d5, 0x08d6, 0x08d7, 0x08d8, 0x08d9, 0x08e0, 0x08e1, 0x08e2,
1239+
0x08e3, 0x08e4, 0x08e5, 0x08e6, 0x08e7, 0x08e8, 0x08e9, 0x08ea,
1240+
0x08eb, 0x08ec, 0x08ed, 0x08ee, 0x08ef, 0x08f0, 0x08f1, 0x08f2,
1241+
0x08f3, 0x08f4, 0x08f5, 0x08f6, 0x08f7, 0x08f8, 0x08f9, 0x08fb,
1242+
0x08fc, 0x08fd, 0x08fe, 0x08ff, 0x0f18, 0x0f19, 0x0f35, 0x0f37,
1243+
0x0f39, 0x0f72, 0x0f7a, 0x0f7b, 0x0f7c, 0x0f7d, 0x0f7e, 0x0f80,
1244+
0x0f82, 0x0f83, 0x0f84, 0x0f86, 0x0f87, 0x0fc6, 0x115f, 0x1160,
1245+
0x1712, 0x1713, 0x1714, 0x1732, 0x1733, 0x1752, 0x1753, 0x1772,
1246+
0x1773, 0x17b4, 0x17b5, 0x180b, 0x180c, 0x180d, 0x180e, 0x1920,
1247+
0x1921, 0x1922, 0x1927, 0x1928, 0x192a, 0x1932, 0x193a, 0x193b,
1248+
0x1a17, 0x1a18, 0x1a1b, 0x1a55, 0x1a56, 0x1a59, 0x1a5a, 0x1a5b,
1249+
0x1a5c, 0x1a5d, 0x1a5e, 0x1a60, 0x1a62, 0x1a65, 0x1a66, 0x1a67,
1250+
0x1a68, 0x1a69, 0x1a6a, 0x1a6c, 0x1a73, 0x1a74, 0x1a75, 0x1a76,
1251+
0x1a77, 0x1a78, 0x1a79, 0x1a7a, 0x1a7b, 0x1a7c, 0x1a7f, 0x1abe,
1252+
0x1b80, 0x1b81, 0x1ba1, 0x1ba2, 0x1ba3, 0x1ba4, 0x1ba5, 0x1ba8,
1253+
0x1ba9, 0x1bac, 0x1bad, 0x1be6, 0x1be8, 0x1be9, 0x1bed, 0x1bee,
1254+
0x1bef, 0x1bf0, 0x1bf1, 0x1c2c, 0x1c2d, 0x1c2e, 0x1c2f, 0x1c30,
1255+
0x1c31, 0x1c32, 0x1c33, 0x1c36, 0x1c37, 0x1ce1, 0x1cf2, 0x1cf3,
1256+
0x1cf7, 0x200b, 0x200c, 0x200d, 0x200e, 0x200f, 0x202a, 0x202b,
1257+
0x202c, 0x202d, 0x202e, 0x2060, 0x2061, 0x2062, 0x2063, 0x2064,
1258+
0x2065, 0x2066, 0x2067, 0x2068, 0x2069, 0x206a, 0x206b, 0x206c,
1259+
0x206d, 0x206e, 0x206f, 0x2cef, 0x2cf0, 0x2cf1, 0x2d7f, 0x3164,
1260+
0xa6f0, 0xa6f1, 0xa802, 0xa806, 0xa80b, 0xa825, 0xa826, 0xa8b6,
1261+
0xa8c4, 0xa948, 0xa949, 0xa94a, 0xa94b, 0xa94c, 0xa94d, 0xa94e,
1262+
0xa94f, 0xa950, 0xa951, 0xa953, 0xa961, 0xa962, 0xa963, 0xa964,
1263+
0xa965, 0xa966, 0xa967, 0xa968, 0xa969, 0xa96a, 0xa96b, 0xa96c,
1264+
0xa96d, 0xa96e, 0xa96f, 0xa970, 0xa971, 0xa972, 0xa973, 0xa974,
1265+
0xa975, 0xa976, 0xa977, 0xa978, 0xa979, 0xa97a, 0xa97b, 0xa97c,
1266+
0xa9e5, 0xaa7b, 0xaa7c, 0xaa7d, 0xaab0, 0xaab2, 0xaab3, 0xaab4,
1267+
0xaab7, 0xaab8, 0xaabe, 0xaabf, 0xaac1, 0xaaec, 0xaaed, 0xaaf6,
1268+
0xabe5, 0xabe8, 0xabe9, 0xabea, 0xabed, 0xd7b1, 0xd7b2, 0xd7b3,
1269+
0xd7b4, 0xd7b5, 0xd7b6, 0xd7b7, 0xd7b8, 0xd7b9, 0xd7ba, 0xd7bb,
1270+
0xd7bc, 0xd7bd, 0xd7be, 0xd7bf, 0xd7c0, 0xd7c1, 0xd7c2, 0xd7c3,
1271+
0xd7c4, 0xd7c5, 0xd7c6, 0xd7cc, 0xd7cd, 0xd7ce, 0xd7cf, 0xd7d0,
1272+
0xd7d1, 0xd7d2, 0xd7d3, 0xd7d4, 0xd7d5, 0xd7d6, 0xd7d7, 0xd7d8,
1273+
0xd7d9, 0xd7da, 0xd7db, 0xd7dc, 0xd7dd, 0xd7de, 0xd7df, 0xd7e0,
1274+
0xd7e1, 0xd7e2, 0xd7e3, 0xd7e4, 0xd7e5, 0xd7e6, 0xd7e7, 0xd7e8,
1275+
0xd7e9, 0xd7ea, 0xd7eb, 0xd7ec, 0xd7ed, 0xd7ee, 0xd7ef, 0xd7f0,
1276+
0xd7f1, 0xd7f2, 0xd7f3, 0xd7f4, 0xd7f5, 0xd7f6, 0xd7f7, 0xd7f8,
1277+
0xd7f9, 0xd7fa, 0xd7fb, 0xf850, 0xf85f, 0xf860, 0xf861, 0xf862,
1278+
0xf863, 0xf864, 0xf865, 0xf866, 0xf867, 0xf868, 0xf869, 0xf86a,
1279+
0xf86b, 0xf86c, 0xf86d, 0xf86e, 0xf86f, 0xf884, 0xf885, 0xf886,
1280+
0xf887, 0xf888, 0xf889, 0xf88a, 0xf88b, 0xf88c, 0xf88d, 0xf88e,
1281+
0xf88f, 0xf890, 0xf891, 0xf892, 0xf893, 0xf894, 0xf895, 0xf896,
1282+
0xf897, 0xf898, 0xf899, 0xf89f, 0xfbb2, 0xfbb3, 0xfbb4, 0xfbb5,
1283+
0xfbb6, 0xfbb7, 0xfbb8, 0xfbb9, 0xfbba, 0xfbbb, 0xfbbd, 0xfbbe,
1284+
0xfbbf, 0xfbc1, 0xfc5e, 0xfc5f, 0xfc60, 0xfc61, 0xfc62, 0xfc63,
1285+
0xfe0f, 0xfe20, 0xfe21, 0xfe22, 0xfe23, 0xfeff, 0xffa0, 0xfff0,
1286+
0xfff1, 0xfff2, 0xfff3, 0xfff4, 0xfff5, 0xfff6, 0xfff7, 0xfff8,
1287+
0xfff9, 0xfffa, 0xfffb, 0xfffc, 0x10a01, 0x10a02, 0x10a03, 0x10a05,
1288+
0x10a06, 0x10a0c, 0x10a0d, 0x10a0e, 0x10a0f, 0x10a38, 0x10a39, 0x10a3a,
1289+
0x11038, 0x11039, 0x1103a, 0x1103b, 0x1103c, 0x1103d, 0x1103e, 0x1103f,
1290+
0x11040, 0x11041, 0x11042, 0x11043, 0x11044, 0x11045, 0x11046, 0x11080,
1291+
0x11081, 0x110b1, 0x110b3, 0x110b4, 0x110b5, 0x110b6, 0x110b9, 0x110ba,
1292+
0x11a01, 0x11a02, 0x11a03, 0x11a04, 0x11a05, 0x11a06, 0x11a07, 0x11a08,
1293+
0x11a09, 0x11a0a, 0x11a33, 0x11a35, 0x11a36, 0x11a37, 0x11a38, 0x11a39,
1294+
0x11a3b, 0x11a3c, 0x11a3d, 0x11a3e, 0x11a51, 0x11a52, 0x11a53, 0x11a54,
1295+
0x11a55, 0x11a56, 0x11a57, 0x11a58, 0x11a59, 0x11a5a, 0x11a5b, 0x11a8a,
1296+
0x11a8b, 0x11a8c, 0x11a8d, 0x11a8e, 0x11a8f, 0x11a90, 0x11a91, 0x11a92,
1297+
0x11a93, 0x11a94, 0x11a95, 0x11a96, 0x11a97, 0x11a98, 0x11d31, 0x11d32,
1298+
0x11d33, 0x11d34, 0x11d35, 0x11d36, 0x11d3a, 0x11d3c, 0x11d3d, 0x11d3f,
1299+
0x11d40, 0x11d41, 0x11d43, 0x11d47, 0x1bc9d, 0x1bca0, 0x1bca1, 0x1bca2,
1300+
0x1bca3, 0x1d173, 0x1d174, 0x1d175, 0x1d176, 0x1d177, 0x1d178, 0x1d179,
1301+
0x1d17a, 0x1da00, 0x1da01, 0x1da02, 0x1da03, 0x1da04, 0x1da05, 0x1da06,
1302+
0x1da07, 0x1da08, 0x1da09, 0x1da0a, 0x1da0b, 0x1da0c, 0x1da0d, 0x1da0e,
1303+
0x1da0f, 0x1da10, 0x1da11, 0x1da12, 0x1da13, 0x1da14, 0x1da15, 0x1da16,
1304+
0x1da17, 0x1da18, 0x1da19, 0x1da1a, 0x1da1b, 0x1da1c, 0x1da1d, 0x1da1e,
1305+
0x1da1f, 0x1da20, 0x1da21, 0x1da22, 0x1da23, 0x1da24, 0x1da25, 0x1da26,
1306+
0x1da27, 0x1da28, 0x1da29, 0x1da2a, 0x1da2b, 0x1da2c, 0x1da2d, 0x1da2e,
1307+
0x1da2f, 0x1da30, 0x1da31, 0x1da32, 0x1da33, 0x1da34, 0x1da35, 0x1da36,
1308+
0x1da3b, 0x1da3c, 0x1da3d, 0x1da3e, 0x1da3f, 0x1da40, 0x1da41, 0x1da42,
1309+
0x1da43, 0x1da44, 0x1da45, 0x1da46, 0x1da47, 0x1da48, 0x1da49, 0x1da4a,
1310+
0x1da4b, 0x1da4c, 0x1da4d, 0x1da4e, 0x1da4f, 0x1da50, 0x1da51, 0x1da52,
1311+
0x1da53, 0x1da54, 0x1da55, 0x1da56, 0x1da57, 0x1da58, 0x1da59, 0x1da5a,
1312+
0x1da5b, 0x1da5c, 0x1da5d, 0x1da5e, 0x1da5f, 0x1da60, 0x1da61, 0x1da62,
1313+
0x1da63, 0x1da64, 0x1da65, 0x1da66, 0x1da67, 0x1da68, 0x1da69, 0x1da6a,
1314+
0x1da6b, 0x1da6c, 0x1da75, 0x1da84, 0x1da9b, 0x1da9c, 0x1da9d, 0x1da9e,
1315+
0x1da9f, 0x1daa1, 0x1daa2, 0x1daa3, 0x1daa4, 0x1daa5, 0x1daa6, 0x1daa7,
1316+
0x1daa8, 0x1daa9, 0x1daaa, 0x1daab, 0x1daac, 0x1daad, 0x1daae, 0x1daaf,
1317+
};
1318+
12091319
/// diagnoseZeroWidth - Check for and error zero-width characters in delimiters.
12101320
/// A non visible character in the middle of a delimter can be used to extend
12111321
/// the literal beyond what it would appear creating potential security bugs.
12121322
static bool diagnoseZeroWidth(const char *&CurPtr, DiagnosticEngine *Diags) {
12131323
// A way needs to be found to find the complete set of zero width chars or
1214-
// this security mitigation will be in vain. Current list was taken from:
1215-
// https://www.ptiglobal.com/2018/04/26/the-beauty-of-unicode-zero-width-characters/
1216-
// https://github.com/dblspk/web-app
1217-
// As this list may not be complete this code is not currently implemented.
1218-
// const char *TmpPtr = CurPtr;
1219-
// while (true) {
1220-
// switch (validateUTF8CharacterAndAdvance(TmpPtr, TmpPtr+3)) {
1221-
// case 0x200B: case 0x200C: case 0x200D: case 0x2060:
1222-
// case 0x2061: case 0x2062: case 0x2063: case 0x2064:
1223-
// case 0x206A: case 0x206B: case 0x206C: case 0x206D:
1224-
// case 0x206E: case 0x206F: case 0xFE00: case 0xFE01:
1225-
// case 0xFEFF:
1226-
// if (Diags)
1227-
// Diags->diagnose(Lexer::getSourceLoc(CurPtr),
1228-
// diag::lex_zerowidth_in_string_delimiter)
1229-
// .fixItRemoveChars(Lexer::getSourceLoc(CurPtr),
1230-
// Lexer::getSourceLoc(CurPtr + 3));
1231-
// CurPtr = TmpPtr;
1232-
// break;
1233-
// default:
1234-
// return true;
1235-
// }
1236-
// }
1237-
return true;
1324+
// this security mitigation will be in vain. Current list is generated using
1325+
// the display width of attributed strings checking when it does not change.
1326+
// The font used was SF Mono, 11pt, the default font of the Xcode editor.
1327+
static std::vector<uint32_t> ZeroWidthV;
1328+
if (!ZeroWidthV.size())
1329+
ZeroWidthV.assign(ZeroWidthC,
1330+
ZeroWidthC + sizeof ZeroWidthC/sizeof ZeroWidthC[0]);
1331+
1332+
const char *TmpPtr = CurPtr;
1333+
while (true) {
1334+
uint32_t NextChar = validateUTF8CharacterAndAdvance(TmpPtr, TmpPtr + 6);
1335+
if (NextChar != '"' && NextChar != '#' &&
1336+
(NextChar == ~0U || (NextChar >= 0xe0000 && NextChar <= 0xe0fff) ||
1337+
std::binary_search(ZeroWidthV.begin(), ZeroWidthV.end(), NextChar))) {
1338+
if (Diags)
1339+
Diags->diagnose(Lexer::getSourceLoc(CurPtr),
1340+
diag::lex_zerowidth_in_string_delimiter)
1341+
.fixItRemoveChars(Lexer::getSourceLoc(CurPtr),
1342+
Lexer::getSourceLoc(TmpPtr));
1343+
CurPtr = TmpPtr;
1344+
continue;
1345+
}
1346+
return true;
1347+
}
12381348
}
12391349

12401350
/// advanceIfMultilineDelimiter - Centralized check for multiline delimiter.

0 commit comments

Comments
 (0)