emscripten-core
diff --git a/‎ChangeLog.md
Lines changed: 13 additions & 0 deletions b/‎ChangeLog.md
Lines changed: 13 additions & 0 deletions
diff --git a/‎src/embind/embind.js
Lines changed: 2 additions & 1 deletion b/‎src/embind/embind.js
Lines changed: 2 additions & 1 deletion
diff --git a/‎src/jsifier.js
Lines changed: 1 addition & 0 deletions b/‎src/jsifier.js
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/library.js
Lines changed: 11 additions & 1 deletion b/‎src/library.js
Lines changed: 11 additions & 1 deletion
diff --git a/‎src/library_dylink.js
Lines changed: 1 addition & 0 deletions b/‎src/library_dylink.js
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/library_fs.js
Lines changed: 4 additions & 1 deletion b/‎src/library_fs.js
Lines changed: 4 additions & 1 deletion
diff --git a/‎src/library_strings.js
Lines changed: 248 additions & 0 deletions b/‎src/library_strings.js
Lines changed: 248 additions & 0 deletions
@@ -20,6 +20,19 @@ See docs/process.md for more on how version tagging works.
 
 3.1.35 (in development)
 -----------------------
+- The following JavaScript runtime functions were converted to JavaScript
+  library functions:
+   - UTF8ArrayToString
+   - UTF8ToString
+   - stringToUTF8Array
+   - stringToUTF8
+   - lengthBytesUTF8
+  If you use any of these functions in your JS code you will now need to include
+  them explictly in one of the following ways:
+   - Add them to a `__deps` entry your JS library file ((with leading $)
+   - Add them to `DEFAULT_LIBRARY_FUNCS_TO_INCLUDE` (with leading $)
+   - Add them to `EXPORTED_FUNCTIONS` (without leading $)
+   - Set `-sLEGACY_RUNTIME` to include all of them at once.
 - `allocateUTF8` and `allocateUTF8OnStack` library function moved to
   `library_legacy.js`.  Prefer the more accurately named `stringToNewUTF8` and
   `stringToNewUTF8OnStack`. (#19089)
 
@@ -656,7 +656,8 @@ var LibraryEmbind = {
   _embind_register_std_string__sig: 'vpp',
   _embind_register_std_string__deps: [
     '$readLatin1String', '$registerType',
-    '$simpleReadValueFromPointer', '$throwBindingError'],
+    '$simpleReadValueFromPointer', '$throwBindingError',
+    '$stringToUTF8', '$lengthBytesUTF8'],
   _embind_register_std_string: function(rawType, name) {
     name = readLatin1String(name);
     var stdStringIsUTF8
 
@@ -203,6 +203,7 @@ function ${name}(${args}) {
       'getWasmTableEntry',
       'runtimeKeepalivePush',
       'runtimeKeepalivePop',
+      'UTF8ToString',
     ];
     for (const dep of autoDeps) {
       if (snippet.includes(dep + '(')) {
 
@@ -2305,6 +2305,7 @@ mergeInto(LibraryManager.library, {
   // Mark as `noleakcheck` otherwise lsan will report the last returned string
   // as a leak.
   emscripten_run_script_string__noleakcheck: true,
+  emscripten_run_script_string__deps: ['$lengthBytesUTF8', '$stringToUTF8', 'malloc'],
   emscripten_run_script_string: function(ptr) {
     {{{ makeEval("var s = eval(UTF8ToString(ptr));") }}}
     if (s == null) {
@@ -2565,7 +2566,7 @@ mergeInto(LibraryManager.library, {
     return callstack;
   },
 
-  emscripten_get_callstack__deps: ['$getCallstack'],
+  emscripten_get_callstack__deps: ['$getCallstack', '$lengthBytesUTF8', '$stringToUTF8'],
   emscripten_get_callstack: function(flags, str, maxbytes) {
     // Use explicit calls to from64 rather then using the __sig
     // magic here.  This is because the __sig wrapper uses arrow function
@@ -2887,6 +2888,7 @@ mergeInto(LibraryManager.library, {
     return result ? result.column || 0 : 0;
   },
 
+  emscripten_get_module_name__deps: ['$stringToUTF8'],
   emscripten_get_module_name: function(buf, length) {
 #if MINIMAL_RUNTIME
     return stringToUTF8('{{{ TARGET_BASENAME }}}.wasm', buf, length);
@@ -3363,6 +3365,9 @@ mergeInto(LibraryManager.library, {
 
   // Use program_invocation_short_name and program_invocation_name in compiled
   // programs. This function is for implementing them.
+#if !MINIMAL_RUNTIME
+  _emscripten_get_progname__deps: ['$stringToUTF8'],
+#endif
   _emscripten_get_progname: function(str, len) {
 #if !MINIMAL_RUNTIME
 #if ASSERTIONS
@@ -3723,5 +3728,10 @@ DEFAULT_LIBRARY_FUNCS_TO_INCLUDE.push(
   '$ccall',
   '$cwrap',
   '$ExitStatus',
+  '$UTF8ArrayToString',
+  '$UTF8ToString',
+  '$stringToUTF8Array',
+  '$stringToUTF8',
+  '$lengthBytesUTF8',
 );
 #endif
@@ -370,6 +370,7 @@ var LibraryDylink = {
 
   // returns the side module metadata as an object
   // { memorySize, memoryAlign, tableSize, tableAlign, neededDynlibs}
+  $getDylinkMetadata__deps: ['$UTF8ArrayToString'],
   $getDylinkMetadata__internal: true,
   $getDylinkMetadata: function(binary) {
     var offset = 0;
 
@@ -5,7 +5,10 @@
  */
 
 mergeInto(LibraryManager.library, {
-  $FS__deps: ['$randomFill', '$PATH', '$PATH_FS', '$TTY', '$MEMFS', '$asyncLoad', '$intArrayFromString',
+  $FS__deps: ['$randomFill', '$PATH', '$PATH_FS', '$TTY', '$MEMFS', '$asyncLoad',
+    '$intArrayFromString',
+    '$stringToUTF8Array',
+    '$lengthBytesUTF8',
 #if LibraryManager.has('library_idbfs.js')
     '$IDBFS',
 #endif
 
@@ -7,7 +7,251 @@
 #include "arrayUtils.js"
 
 mergeInto(LibraryManager.library, {
+#if TEXTDECODER == 2
+  $UTF8Decoder: "new TextDecoder('utf8')",
+#elif TEXTDECODER == 1
+  $UTF8Decoder: "typeof TextDecoder != 'undefined' ? new TextDecoder('utf8') : undefined",
+#endif
+
+  $UTF8ArrayToString__docs: `
+  /**
+   * Given a pointer 'idx' to a null-terminated UTF8-encoded string in the given
+   * array that contains uint8 values, returns a copy of that string as a
+   * Javascript String object.
+   * heapOrArray is either a regular array, or a JavaScript typed array view.
+   * @param {number} idx
+   * @param {number=} maxBytesToRead
+   * @return {string}
+   */`,
+#if TEXTDECODER
+  $UTF8ArrayToString__deps: ['$UTF8Decoder'],
+#endif
+  $UTF8ArrayToString: function(heapOrArray, idx, maxBytesToRead) {
+#if CAN_ADDRESS_2GB
+    idx >>>= 0;
+#endif
+    var endIdx = idx + maxBytesToRead;
+#if TEXTDECODER
+    var endPtr = idx;
+    // TextDecoder needs to know the byte length in advance, it doesn't stop on
+    // null terminator by itself.  Also, use the length info to avoid running tiny
+    // strings through TextDecoder, since .subarray() allocates garbage.
+    // (As a tiny code save trick, compare endPtr against endIdx using a negation,
+    // so that undefined means Infinity)
+    while (heapOrArray[endPtr] && !(endPtr >= endIdx)) ++endPtr;
+#endif // TEXTDECODER
+
+#if TEXTDECODER == 2
+    return UTF8Decoder.decode(heapOrArray.buffer ? {{{ getUnsharedTextDecoderView('heapOrArray', 'idx', 'endPtr') }}} : new Uint8Array(heapOrArray.slice(idx, endPtr)));
+#else // TEXTDECODER == 2
+#if TEXTDECODER
+    if (endPtr - idx > 16 && heapOrArray.buffer && UTF8Decoder) {
+      return UTF8Decoder.decode({{{ getUnsharedTextDecoderView('heapOrArray', 'idx', 'endPtr') }}});
+    }
+#endif // TEXTDECODER
+    var str = '';
+#if TEXTDECODER
+    // If building with TextDecoder, we have already computed the string length
+    // above, so test loop end condition against that
+    while (idx < endPtr) {
+#else
+    while (!(idx >= endIdx)) {
+#endif
+      // For UTF8 byte structure, see:
+      // http://en.wikipedia.org/wiki/UTF-8#Description
+      // https://www.ietf.org/rfc/rfc2279.txt
+      // https://tools.ietf.org/html/rfc3629
+      var u0 = heapOrArray[idx++];
+#if !TEXTDECODER
+      // If not building with TextDecoder enabled, we don't know the string
+      // length, so scan for \0 byte.
+      // If building with TextDecoder, we know exactly at what byte index the
+      // string ends, so checking for nulls here would be redundant.
+      if (!u0) return str;
+#endif
+      if (!(u0 & 0x80)) { str += String.fromCharCode(u0); continue; }
+      var u1 = heapOrArray[idx++] & 63;
+      if ((u0 & 0xE0) == 0xC0) { str += String.fromCharCode(((u0 & 31) << 6) | u1); continue; }
+      var u2 = heapOrArray[idx++] & 63;
+      if ((u0 & 0xF0) == 0xE0) {
+        u0 = ((u0 & 15) << 12) | (u1 << 6) | u2;
+      } else {
+#if ASSERTIONS
+        if ((u0 & 0xF8) != 0xF0) warnOnce('Invalid UTF-8 leading byte ' + ptrToString(u0) + ' encountered when deserializing a UTF-8 string in wasm memory to a JS string!');
+#endif
+        u0 = ((u0 & 7) << 18) | (u1 << 12) | (u2 << 6) | (heapOrArray[idx++] & 63);
+      }
+
+      if (u0 < 0x10000) {
+        str += String.fromCharCode(u0);
+      } else {
+        var ch = u0 - 0x10000;
+        str += String.fromCharCode(0xD800 | (ch >> 10), 0xDC00 | (ch & 0x3FF));
+      }
+    }
+    return str;
+#endif // TEXTDECODER == 2
+  },
+
+  $UTF8ToString__docs: `
+  /**
+   * Given a pointer 'ptr' to a null-terminated UTF8-encoded string in the
+   * emscripten HEAP, returns a copy of that string as a Javascript String object.
+   *
+   * @param {number} ptr
+   * @param {number=} maxBytesToRead - An optional length that specifies the
+   *   maximum number of bytes to read. You can omit this parameter to scan the
+   *   string until the first \0 byte. If maxBytesToRead is passed, and the string
+   *   at [ptr, ptr+maxBytesToReadr[ contains a null byte in the middle, then the
+   *   string will cut short at that byte index (i.e. maxBytesToRead will not
+   *   produce a string of exact length [ptr, ptr+maxBytesToRead[) N.B. mixing
+   *   frequent uses of UTF8ToString() with and without maxBytesToRead may throw
+   *   JS JIT optimizations off, so it is worth to consider consistently using one
+   * @return {string}
+   */`,
+#if TEXTDECODER == 2
+  $UTF8ToString__deps: ['$UTF8Decoder'],
+#else
+  $UTF8ToString__deps: ['$UTF8ArrayToString'],
+#endif
+  $UTF8ToString: function(ptr, maxBytesToRead) {
+#if ASSERTIONS
+    assert(typeof ptr == 'number');
+#endif
+#if CAN_ADDRESS_2GB
+    ptr >>>= 0;
+#endif
+#if TEXTDECODER == 2
+    if (!ptr) return '';
+    var maxPtr = ptr + maxBytesToRead;
+    for (var end = ptr; !(end >= maxPtr) && HEAPU8[end];) ++end;
+    return UTF8Decoder.decode({{{ getUnsharedTextDecoderView('HEAPU8', 'ptr', 'end') }}});
+#else
+    return ptr ? UTF8ArrayToString(HEAPU8, ptr, maxBytesToRead) : '';
+#endif
+  },
+
+  /**
+   * Copies the given Javascript String object 'str' to the given byte array at
+   * address 'outIdx', encoded in UTF8 form and null-terminated. The copy will
+   * require at most str.length*4+1 bytes of space in the HEAP.  Use the function
+   * lengthBytesUTF8 to compute the exact number of bytes (excluding null
+   * terminator) that this function will write.
+   *
+   * @param {string} str - The Javascript string to copy.
+   * @param {ArrayBufferView|Array<number>} heap - The array to copy to. Each
+   *                                               index in this array is assumed
+   *                                               to be one 8-byte element.
+   * @param {number} outIdx - The starting offset in the array to begin the copying.
+   * @param {number} maxBytesToWrite - The maximum number of bytes this function
+   *                                   can write to the array.  This count should
+   *                                   include the null terminator, i.e. if
+   *                                   maxBytesToWrite=1, only the null terminator
+   *                                   will be written and nothing else.
+   *                                   maxBytesToWrite=0 does not write any bytes
+   *                                   to the output, not even the null
+   *                                   terminator.
+   * @return {number} The number of bytes written, EXCLUDING the null terminator.
+   */
+  $stringToUTF8Array: function(str, heap, outIdx, maxBytesToWrite) {
+#if CAN_ADDRESS_2GB
+    outIdx >>>= 0;
+#endif
+    // Parameter maxBytesToWrite is not optional. Negative values, 0, null,
+    // undefined and false each don't write out any bytes.
+    if (!(maxBytesToWrite > 0))
+      return 0;
+
+    var startIdx = outIdx;
+    var endIdx = outIdx + maxBytesToWrite - 1; // -1 for string null terminator.
+    for (var i = 0; i < str.length; ++i) {
+      // Gotcha: charCodeAt returns a 16-bit word that is a UTF-16 encoded code
+      // unit, not a Unicode code point of the character! So decode
+      // UTF16->UTF32->UTF8.
+      // See http://unicode.org/faq/utf_bom.html#utf16-3
+      // For UTF8 byte structure, see http://en.wikipedia.org/wiki/UTF-8#Description
+      // and https://www.ietf.org/rfc/rfc2279.txt
+      // and https://tools.ietf.org/html/rfc3629
+      var u = str.charCodeAt(i); // possibly a lead surrogate
+      if (u >= 0xD800 && u <= 0xDFFF) {
+        var u1 = str.charCodeAt(++i);
+        u = 0x10000 + ((u & 0x3FF) << 10) | (u1 & 0x3FF);
+      }
+      if (u <= 0x7F) {
+        if (outIdx >= endIdx) break;
+        heap[outIdx++] = u;
+      } else if (u <= 0x7FF) {
+        if (outIdx + 1 >= endIdx) break;
+        heap[outIdx++] = 0xC0 | (u >> 6);
+        heap[outIdx++] = 0x80 | (u & 63);
+      } else if (u <= 0xFFFF) {
+        if (outIdx + 2 >= endIdx) break;
+        heap[outIdx++] = 0xE0 | (u >> 12);
+        heap[outIdx++] = 0x80 | ((u >> 6) & 63);
+        heap[outIdx++] = 0x80 | (u & 63);
+      } else {
+        if (outIdx + 3 >= endIdx) break;
+#if ASSERTIONS
+        if (u > 0x10FFFF) warnOnce('Invalid Unicode code point ' + ptrToString(u) + ' encountered when serializing a JS string to a UTF-8 string in wasm memory! (Valid unicode code points should be in range 0-0x10FFFF).');
+#endif
+        heap[outIdx++] = 0xF0 | (u >> 18);
+        heap[outIdx++] = 0x80 | ((u >> 12) & 63);
+        heap[outIdx++] = 0x80 | ((u >> 6) & 63);
+        heap[outIdx++] = 0x80 | (u & 63);
+      }
+    }
+    // Null-terminate the pointer to the buffer.
+    heap[outIdx] = 0;
+    return outIdx - startIdx;
+  },
+
+  /**
+   * Copies the given Javascript String object 'str' to the emscripten HEAP at
+   * address 'outPtr', null-terminated and encoded in UTF8 form. The copy will
+   * require at most str.length*4+1 bytes of space in the HEAP.
+   * Use the function lengthBytesUTF8 to compute the exact number of bytes
+   * (excluding null terminator) that this function will write.
+   *
+   * @return {number} The number of bytes written, EXCLUDING the null terminator.
+   */
+  $stringToUTF8__deps: ['$stringToUTF8Array'],
+  $stringToUTF8: function(str, outPtr, maxBytesToWrite) {
+#if ASSERTIONS
+    assert(typeof maxBytesToWrite == 'number', 'stringToUTF8(str, outPtr, maxBytesToWrite) is missing the third parameter that specifies the length of the output buffer!');
+#endif
+    return stringToUTF8Array(str, {{{ heapAndOffset('HEAPU8', 'outPtr') }}}, maxBytesToWrite);
+  },
+
+  /**
+   * Returns the number of bytes the given Javascript string takes if encoded as a
+   * UTF8 byte array, EXCLUDING the null terminator byte.
+   *
+   * @param {string} str - JavaScript string to operator on
+   * @return {number} Length, in bytes, of the UTF8 encoded string.
+   */
+  $lengthBytesUTF8: function(str) {
+    var len = 0;
+    for (var i = 0; i < str.length; ++i) {
+      // Gotcha: charCodeAt returns a 16-bit word that is a UTF-16 encoded code
+      // unit, not a Unicode code point of the character! So decode
+      // UTF16->UTF32->UTF8.
+      // See http://unicode.org/faq/utf_bom.html#utf16-3
+      var c = str.charCodeAt(i); // possibly a lead surrogate
+      if (c <= 0x7F) {
+        len++;
+      } else if (c <= 0x7FF) {
+        len += 2;
+      } else if (c >= 0xD800 && c <= 0xDFFF) {
+        len += 4; ++i;
+      } else {
+        len += 3;
+      }
+    }
+    return len;
+  },
+
   $intArrayFromString__docs: '/** @type {function(string, boolean=, number=)} */',
+  $intArrayFromString__deps: ['$lengthBytesUTF8', '$stringToUTF8Array'],
   $intArrayFromString: intArrayFromString,
   $intArrayToString: intArrayToString,
 
@@ -226,6 +470,7 @@ mergeInto(LibraryManager.library, {
 
   // Allocate heap space for a JS string, and write it there.
   // It is the responsibility of the caller to free() that memory.
+  $stringToNewUTF8__deps: ['$lengthBytesUTF8', '$stringToUTF8Array'],
   $stringToNewUTF8: function(str) {
     var size = lengthBytesUTF8(str) + 1;
     var ret = {{{ makeMalloc('stringToNewUTF8', 'size') }}};
@@ -234,6 +479,7 @@ mergeInto(LibraryManager.library, {
   },
 
   // Allocate stack space for a JS string, and write it there.
+  $stringToNewUTF8OnStack__deps: ['$lengthBytesUTF8', '$stringToUTF8Array'],
   $stringToNewUTF8OnStack: function(str) {
     var size = lengthBytesUTF8(str) + 1;
     var ret = stackAlloc(size);
@@ -247,6 +493,7 @@ mergeInto(LibraryManager.library, {
   // in a maximum length that can be used to be secure from out of bounds
   // writes.
   $writeStringToMemory__docs: '/** @deprecated @param {boolean=} dontAddNull */',
+  $writeStringToMemory__dpes: ['$lengthBytesUTF8', '$stringToUTF8Array'],
   $writeStringToMemory: function(string, buffer, dontAddNull) {
     warnOnce('writeStringToMemory is deprecated and should not be called! Use stringToUTF8() instead!');
 
@@ -280,4 +527,5 @@ mergeInto(LibraryManager.library, {
     // Null-terminate the pointer to the HEAP.
     if (!dontAddNull) {{{ makeSetValue('buffer', 0, 0, 'i8') }}};
   },
+
 });