Skip to content

Commit 38c37ad

Browse files
committed
Remove support of (de)serializing binary byte data between raw data buffer and a UTF-8 string. With TextDecoder and eager handling of 0 as a terminator byte that never quite worked, so it is unlikely that anyone would have ever depended on that feature.
1 parent b83ccf2 commit 38c37ad

File tree

1 file changed

+12
-33
lines changed

1 file changed

+12
-33
lines changed

src/preamble.js

Lines changed: 12 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -458,36 +458,27 @@ function UTF8ArrayToString(u8Array, idx) {
458458
return UTF8Decoder.decode(u8Array.subarray(idx, endPtr));
459459
} else {
460460
#endif // TEXTDECODER
461-
var u0, u1, u2, u3, u4, u5;
462-
463461
var str = '';
464462
while (1) {
465463
// For UTF8 byte structure, see:
466464
// http://en.wikipedia.org/wiki/UTF-8#Description
467465
// https://www.ietf.org/rfc/rfc2279.txt
468466
// https://tools.ietf.org/html/rfc3629
469-
u0 = u8Array[idx++];
467+
var u0 = u8Array[idx++];
470468
if (!u0) return str;
471469
if (!(u0 & 0x80)) { str += String.fromCharCode(u0); continue; }
472-
u1 = u8Array[idx++] & 63;
470+
var u1 = u8Array[idx++] & 63;
473471
if ((u0 & 0xE0) == 0xC0) { str += String.fromCharCode(((u0 & 31) << 6) | u1); continue; }
474-
u2 = u8Array[idx++] & 63;
472+
var u2 = u8Array[idx++] & 63;
475473
if ((u0 & 0xF0) == 0xE0) {
476474
u0 = ((u0 & 15) << 12) | (u1 << 6) | u2;
477475
} else {
478-
u3 = u8Array[idx++] & 63;
479-
if ((u0 & 0xF8) == 0xF0) {
480-
u0 = ((u0 & 7) << 18) | (u1 << 12) | (u2 << 6) | u3;
481-
} else {
482-
u4 = u8Array[idx++] & 63;
483-
if ((u0 & 0xFC) == 0xF8) {
484-
u0 = ((u0 & 3) << 24) | (u1 << 18) | (u2 << 12) | (u3 << 6) | u4;
485-
} else {
486-
u5 = u8Array[idx++] & 63;
487-
u0 = ((u0 & 1) << 30) | (u1 << 24) | (u2 << 18) | (u3 << 12) | (u4 << 6) | u5;
488-
}
489-
}
476+
#if ASSERTIONS
477+
if ((u0 & 0xF8) != 0xF0) warnOnce('Invalid UTF-8 leading byte 0x' + u0.toString(16) + ' encountered when deserializing a UTF-8 string on the asm.js/wasm heap to a JS string!');
478+
#endif
479+
u0 = ((u0 & 7) << 18) | (u1 << 12) | (u2 << 6) | (u8Array[idx++] & 63);
490480
}
481+
491482
if (u0 < 0x10000) {
492483
str += String.fromCharCode(u0);
493484
} else {
@@ -548,27 +539,15 @@ function stringToUTF8Array(str, outU8Array, outIdx, maxBytesToWrite) {
548539
outU8Array[outIdx++] = 0xE0 | (u >> 12);
549540
outU8Array[outIdx++] = 0x80 | ((u >> 6) & 63);
550541
outU8Array[outIdx++] = 0x80 | (u & 63);
551-
} else if (u <= 0x1FFFFF) {
542+
} else {
552543
if (outIdx + 3 >= endIdx) break;
544+
#if ASSERTIONS
545+
if (u >= 0x200000) warnOnce('Invalid Unicode code point 0x' + u.toString(16) + ' encountered when serializing a JS string to an UTF-8 string on the asm.js/wasm heap! (Valid unicode code points should be in range 0-0x1FFFFF).');
546+
#endif
553547
outU8Array[outIdx++] = 0xF0 | (u >> 18);
554548
outU8Array[outIdx++] = 0x80 | ((u >> 12) & 63);
555549
outU8Array[outIdx++] = 0x80 | ((u >> 6) & 63);
556550
outU8Array[outIdx++] = 0x80 | (u & 63);
557-
} else if (u <= 0x3FFFFFF) {
558-
if (outIdx + 4 >= endIdx) break;
559-
outU8Array[outIdx++] = 0xF8 | (u >> 24);
560-
outU8Array[outIdx++] = 0x80 | ((u >> 18) & 63);
561-
outU8Array[outIdx++] = 0x80 | ((u >> 12) & 63);
562-
outU8Array[outIdx++] = 0x80 | ((u >> 6) & 63);
563-
outU8Array[outIdx++] = 0x80 | (u & 63);
564-
} else {
565-
if (outIdx + 5 >= endIdx) break;
566-
outU8Array[outIdx++] = 0xFC | (u >> 30);
567-
outU8Array[outIdx++] = 0x80 | ((u >> 24) & 63);
568-
outU8Array[outIdx++] = 0x80 | ((u >> 18) & 63);
569-
outU8Array[outIdx++] = 0x80 | ((u >> 12) & 63);
570-
outU8Array[outIdx++] = 0x80 | ((u >> 6) & 63);
571-
outU8Array[outIdx++] = 0x80 | (u & 63);
572551
}
573552
}
574553
// Null-terminate the pointer to the buffer.

0 commit comments

Comments
 (0)