Skip to content

Commit 8c047b1

Browse files
authored
Simplify + optimize lengthBytesUTF8. Don't fully decode surrogates (#17413)
1 parent 14c106a commit 8c047b1

File tree

1 file changed

+10
-6
lines changed

1 file changed

+10
-6
lines changed

src/runtime_strings.js

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -191,12 +191,16 @@ function lengthBytesUTF8(str) {
191191
for (var i = 0; i < str.length; ++i) {
192192
// Gotcha: charCodeAt returns a 16-bit word that is a UTF-16 encoded code unit, not a Unicode code point of the character! So decode UTF16->UTF32->UTF8.
193193
// See http://unicode.org/faq/utf_bom.html#utf16-3
194-
var u = str.charCodeAt(i); // possibly a lead surrogate
195-
if (u >= 0xD800 && u <= 0xDFFF) u = 0x10000 + ((u & 0x3FF) << 10) | (str.charCodeAt(++i) & 0x3FF);
196-
if (u <= 0x7F) ++len;
197-
else if (u <= 0x7FF) len += 2;
198-
else if (u <= 0xFFFF) len += 3;
199-
else len += 4;
194+
var c = str.charCodeAt(i); // possibly a lead surrogate
195+
if (c <= 0x7F) {
196+
len++;
197+
} else if (c <= 0x7FF) {
198+
len += 2;
199+
} else if (c >= 0xD800 && c <= 0xDFFF) {
200+
len += 4; ++i;
201+
} else {
202+
len += 3;
203+
}
200204
}
201205
return len;
202206
}

0 commit comments

Comments
 (0)