Skip to content

Commit f919243

Browse files
committed
Move runtime_strings.js code into library_strings.js
1 parent af217b3 commit f919243

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+334
-300
lines changed

ChangeLog.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,19 @@ See docs/process.md for more on how version tagging works.
2020

2121
3.1.35 (in development)
2222
-----------------------
23+
- The following JavaScript runtime functions were converted to JavaScript
24+
library functions:
25+
- UTF8ArrayToString
26+
- UTF8ToString
27+
- stringToUTF8Array
28+
- stringToUTF8
29+
- lengthBytesUTF8
30+
If you use any of these functions in your JS code you will now need to include
31+
them explictly in one of the following ways:
32+
- Add them to a `__deps` entry your JS library file ((with leading $)
33+
- Add them to `DEFAULT_LIBRARY_FUNCS_TO_INCLUDE` (with leading $)
34+
- Add them to `EXPORTED_FUNCTIONS` (without leading $)
35+
- Set `-sLEGACY_RUNTIME` to include all of them at once.
2336
- `allocateUTF8` and `allocateUTF8OnStack` library function moved to
2437
`library_legacy.js`. Prefer the more accurately named `stringToNewUTF8` and
2538
`stringToNewUTF8OnStack`. (#19089)

src/embind/embind.js

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -656,7 +656,8 @@ var LibraryEmbind = {
656656
_embind_register_std_string__sig: 'vpp',
657657
_embind_register_std_string__deps: [
658658
'$readLatin1String', '$registerType',
659-
'$simpleReadValueFromPointer', '$throwBindingError'],
659+
'$simpleReadValueFromPointer', '$throwBindingError',
660+
'$stringToUTF8', '$lengthBytesUTF8'],
660661
_embind_register_std_string: function(rawType, name) {
661662
name = readLatin1String(name);
662663
var stdStringIsUTF8

src/jsifier.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,7 @@ function ${name}(${args}) {
203203
'getWasmTableEntry',
204204
'runtimeKeepalivePush',
205205
'runtimeKeepalivePop',
206+
'UTF8ToString',
206207
];
207208
for (const dep of autoDeps) {
208209
if (snippet.includes(dep + '(')) {

src/library.js

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2305,6 +2305,7 @@ mergeInto(LibraryManager.library, {
23052305
// Mark as `noleakcheck` otherwise lsan will report the last returned string
23062306
// as a leak.
23072307
emscripten_run_script_string__noleakcheck: true,
2308+
emscripten_run_script_string__deps: ['$lengthBytesUTF8', '$stringToUTF8', 'malloc'],
23082309
emscripten_run_script_string: function(ptr) {
23092310
{{{ makeEval("var s = eval(UTF8ToString(ptr));") }}}
23102311
if (s == null) {
@@ -2565,7 +2566,7 @@ mergeInto(LibraryManager.library, {
25652566
return callstack;
25662567
},
25672568

2568-
emscripten_get_callstack__deps: ['$getCallstack'],
2569+
emscripten_get_callstack__deps: ['$getCallstack', '$lengthBytesUTF8', '$stringToUTF8'],
25692570
emscripten_get_callstack: function(flags, str, maxbytes) {
25702571
// Use explicit calls to from64 rather then using the __sig
25712572
// magic here. This is because the __sig wrapper uses arrow function
@@ -2887,6 +2888,7 @@ mergeInto(LibraryManager.library, {
28872888
return result ? result.column || 0 : 0;
28882889
},
28892890

2891+
emscripten_get_module_name__deps: ['$stringToUTF8'],
28902892
emscripten_get_module_name: function(buf, length) {
28912893
#if MINIMAL_RUNTIME
28922894
return stringToUTF8('{{{ TARGET_BASENAME }}}.wasm', buf, length);
@@ -3363,6 +3365,9 @@ mergeInto(LibraryManager.library, {
33633365

33643366
// Use program_invocation_short_name and program_invocation_name in compiled
33653367
// programs. This function is for implementing them.
3368+
#if !MINIMAL_RUNTIME
3369+
_emscripten_get_progname__deps: ['$stringToUTF8'],
3370+
#endif
33663371
_emscripten_get_progname: function(str, len) {
33673372
#if !MINIMAL_RUNTIME
33683373
#if ASSERTIONS
@@ -3723,5 +3728,10 @@ DEFAULT_LIBRARY_FUNCS_TO_INCLUDE.push(
37233728
'$ccall',
37243729
'$cwrap',
37253730
'$ExitStatus',
3731+
'$UTF8ArrayToString',
3732+
'$UTF8ToString',
3733+
'$stringToUTF8Array',
3734+
'$stringToUTF8',
3735+
'$lengthBytesUTF8',
37263736
);
37273737
#endif

src/library_dylink.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -370,6 +370,7 @@ var LibraryDylink = {
370370

371371
// returns the side module metadata as an object
372372
// { memorySize, memoryAlign, tableSize, tableAlign, neededDynlibs}
373+
$getDylinkMetadata__deps: ['$UTF8ArrayToString'],
373374
$getDylinkMetadata__internal: true,
374375
$getDylinkMetadata: function(binary) {
375376
var offset = 0;

src/library_fs.js

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,10 @@
55
*/
66

77
mergeInto(LibraryManager.library, {
8-
$FS__deps: ['$randomFill', '$PATH', '$PATH_FS', '$TTY', '$MEMFS', '$asyncLoad', '$intArrayFromString',
8+
$FS__deps: ['$randomFill', '$PATH', '$PATH_FS', '$TTY', '$MEMFS', '$asyncLoad',
9+
'$intArrayFromString',
10+
'$stringToUTF8Array',
11+
'$lengthBytesUTF8',
912
#if LibraryManager.has('library_idbfs.js')
1013
'$IDBFS',
1114
#endif

src/library_strings.js

Lines changed: 248 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,251 @@
77
#include "arrayUtils.js"
88

99
mergeInto(LibraryManager.library, {
10+
#if TEXTDECODER == 2
11+
$UTF8Decoder: "new TextDecoder('utf8')",
12+
#elif TEXTDECODER == 1
13+
$UTF8Decoder: "typeof TextDecoder != 'undefined' ? new TextDecoder('utf8') : undefined",
14+
#endif
15+
16+
$UTF8ArrayToString__docs: `
17+
/**
18+
* Given a pointer 'idx' to a null-terminated UTF8-encoded string in the given
19+
* array that contains uint8 values, returns a copy of that string as a
20+
* Javascript String object.
21+
* heapOrArray is either a regular array, or a JavaScript typed array view.
22+
* @param {number} idx
23+
* @param {number=} maxBytesToRead
24+
* @return {string}
25+
*/`,
26+
#if TEXTDECODER
27+
$UTF8ArrayToString__deps: ['$UTF8Decoder'],
28+
#endif
29+
$UTF8ArrayToString: function(heapOrArray, idx, maxBytesToRead) {
30+
#if CAN_ADDRESS_2GB
31+
idx >>>= 0;
32+
#endif
33+
var endIdx = idx + maxBytesToRead;
34+
#if TEXTDECODER
35+
var endPtr = idx;
36+
// TextDecoder needs to know the byte length in advance, it doesn't stop on
37+
// null terminator by itself. Also, use the length info to avoid running tiny
38+
// strings through TextDecoder, since .subarray() allocates garbage.
39+
// (As a tiny code save trick, compare endPtr against endIdx using a negation,
40+
// so that undefined means Infinity)
41+
while (heapOrArray[endPtr] && !(endPtr >= endIdx)) ++endPtr;
42+
#endif // TEXTDECODER
43+
44+
#if TEXTDECODER == 2
45+
return UTF8Decoder.decode(heapOrArray.buffer ? {{{ getUnsharedTextDecoderView('heapOrArray', 'idx', 'endPtr') }}} : new Uint8Array(heapOrArray.slice(idx, endPtr)));
46+
#else // TEXTDECODER == 2
47+
#if TEXTDECODER
48+
if (endPtr - idx > 16 && heapOrArray.buffer && UTF8Decoder) {
49+
return UTF8Decoder.decode({{{ getUnsharedTextDecoderView('heapOrArray', 'idx', 'endPtr') }}});
50+
}
51+
#endif // TEXTDECODER
52+
var str = '';
53+
#if TEXTDECODER
54+
// If building with TextDecoder, we have already computed the string length
55+
// above, so test loop end condition against that
56+
while (idx < endPtr) {
57+
#else
58+
while (!(idx >= endIdx)) {
59+
#endif
60+
// For UTF8 byte structure, see:
61+
// http://en.wikipedia.org/wiki/UTF-8#Description
62+
// https://www.ietf.org/rfc/rfc2279.txt
63+
// https://tools.ietf.org/html/rfc3629
64+
var u0 = heapOrArray[idx++];
65+
#if !TEXTDECODER
66+
// If not building with TextDecoder enabled, we don't know the string
67+
// length, so scan for \0 byte.
68+
// If building with TextDecoder, we know exactly at what byte index the
69+
// string ends, so checking for nulls here would be redundant.
70+
if (!u0) return str;
71+
#endif
72+
if (!(u0 & 0x80)) { str += String.fromCharCode(u0); continue; }
73+
var u1 = heapOrArray[idx++] & 63;
74+
if ((u0 & 0xE0) == 0xC0) { str += String.fromCharCode(((u0 & 31) << 6) | u1); continue; }
75+
var u2 = heapOrArray[idx++] & 63;
76+
if ((u0 & 0xF0) == 0xE0) {
77+
u0 = ((u0 & 15) << 12) | (u1 << 6) | u2;
78+
} else {
79+
#if ASSERTIONS
80+
if ((u0 & 0xF8) != 0xF0) warnOnce('Invalid UTF-8 leading byte ' + ptrToString(u0) + ' encountered when deserializing a UTF-8 string in wasm memory to a JS string!');
81+
#endif
82+
u0 = ((u0 & 7) << 18) | (u1 << 12) | (u2 << 6) | (heapOrArray[idx++] & 63);
83+
}
84+
85+
if (u0 < 0x10000) {
86+
str += String.fromCharCode(u0);
87+
} else {
88+
var ch = u0 - 0x10000;
89+
str += String.fromCharCode(0xD800 | (ch >> 10), 0xDC00 | (ch & 0x3FF));
90+
}
91+
}
92+
return str;
93+
#endif // TEXTDECODER == 2
94+
},
95+
96+
$UTF8ToString__docs: `
97+
/**
98+
* Given a pointer 'ptr' to a null-terminated UTF8-encoded string in the
99+
* emscripten HEAP, returns a copy of that string as a Javascript String object.
100+
*
101+
* @param {number} ptr
102+
* @param {number=} maxBytesToRead - An optional length that specifies the
103+
* maximum number of bytes to read. You can omit this parameter to scan the
104+
* string until the first \0 byte. If maxBytesToRead is passed, and the string
105+
* at [ptr, ptr+maxBytesToReadr[ contains a null byte in the middle, then the
106+
* string will cut short at that byte index (i.e. maxBytesToRead will not
107+
* produce a string of exact length [ptr, ptr+maxBytesToRead[) N.B. mixing
108+
* frequent uses of UTF8ToString() with and without maxBytesToRead may throw
109+
* JS JIT optimizations off, so it is worth to consider consistently using one
110+
* @return {string}
111+
*/`,
112+
#if TEXTDECODER == 2
113+
$UTF8ToString__deps: ['$UTF8Decoder'],
114+
#else
115+
$UTF8ToString__deps: ['$UTF8ArrayToString'],
116+
#endif
117+
$UTF8ToString: function(ptr, maxBytesToRead) {
118+
#if ASSERTIONS
119+
assert(typeof ptr == 'number');
120+
#endif
121+
#if CAN_ADDRESS_2GB
122+
ptr >>>= 0;
123+
#endif
124+
#if TEXTDECODER == 2
125+
if (!ptr) return '';
126+
var maxPtr = ptr + maxBytesToRead;
127+
for (var end = ptr; !(end >= maxPtr) && HEAPU8[end];) ++end;
128+
return UTF8Decoder.decode({{{ getUnsharedTextDecoderView('HEAPU8', 'ptr', 'end') }}});
129+
#else
130+
return ptr ? UTF8ArrayToString(HEAPU8, ptr, maxBytesToRead) : '';
131+
#endif
132+
},
133+
134+
/**
135+
* Copies the given Javascript String object 'str' to the given byte array at
136+
* address 'outIdx', encoded in UTF8 form and null-terminated. The copy will
137+
* require at most str.length*4+1 bytes of space in the HEAP. Use the function
138+
* lengthBytesUTF8 to compute the exact number of bytes (excluding null
139+
* terminator) that this function will write.
140+
*
141+
* @param {string} str - The Javascript string to copy.
142+
* @param {ArrayBufferView|Array<number>} heap - The array to copy to. Each
143+
* index in this array is assumed
144+
* to be one 8-byte element.
145+
* @param {number} outIdx - The starting offset in the array to begin the copying.
146+
* @param {number} maxBytesToWrite - The maximum number of bytes this function
147+
* can write to the array. This count should
148+
* include the null terminator, i.e. if
149+
* maxBytesToWrite=1, only the null terminator
150+
* will be written and nothing else.
151+
* maxBytesToWrite=0 does not write any bytes
152+
* to the output, not even the null
153+
* terminator.
154+
* @return {number} The number of bytes written, EXCLUDING the null terminator.
155+
*/
156+
$stringToUTF8Array: function(str, heap, outIdx, maxBytesToWrite) {
157+
#if CAN_ADDRESS_2GB
158+
outIdx >>>= 0;
159+
#endif
160+
// Parameter maxBytesToWrite is not optional. Negative values, 0, null,
161+
// undefined and false each don't write out any bytes.
162+
if (!(maxBytesToWrite > 0))
163+
return 0;
164+
165+
var startIdx = outIdx;
166+
var endIdx = outIdx + maxBytesToWrite - 1; // -1 for string null terminator.
167+
for (var i = 0; i < str.length; ++i) {
168+
// Gotcha: charCodeAt returns a 16-bit word that is a UTF-16 encoded code
169+
// unit, not a Unicode code point of the character! So decode
170+
// UTF16->UTF32->UTF8.
171+
// See http://unicode.org/faq/utf_bom.html#utf16-3
172+
// For UTF8 byte structure, see http://en.wikipedia.org/wiki/UTF-8#Description
173+
// and https://www.ietf.org/rfc/rfc2279.txt
174+
// and https://tools.ietf.org/html/rfc3629
175+
var u = str.charCodeAt(i); // possibly a lead surrogate
176+
if (u >= 0xD800 && u <= 0xDFFF) {
177+
var u1 = str.charCodeAt(++i);
178+
u = 0x10000 + ((u & 0x3FF) << 10) | (u1 & 0x3FF);
179+
}
180+
if (u <= 0x7F) {
181+
if (outIdx >= endIdx) break;
182+
heap[outIdx++] = u;
183+
} else if (u <= 0x7FF) {
184+
if (outIdx + 1 >= endIdx) break;
185+
heap[outIdx++] = 0xC0 | (u >> 6);
186+
heap[outIdx++] = 0x80 | (u & 63);
187+
} else if (u <= 0xFFFF) {
188+
if (outIdx + 2 >= endIdx) break;
189+
heap[outIdx++] = 0xE0 | (u >> 12);
190+
heap[outIdx++] = 0x80 | ((u >> 6) & 63);
191+
heap[outIdx++] = 0x80 | (u & 63);
192+
} else {
193+
if (outIdx + 3 >= endIdx) break;
194+
#if ASSERTIONS
195+
if (u > 0x10FFFF) warnOnce('Invalid Unicode code point ' + ptrToString(u) + ' encountered when serializing a JS string to a UTF-8 string in wasm memory! (Valid unicode code points should be in range 0-0x10FFFF).');
196+
#endif
197+
heap[outIdx++] = 0xF0 | (u >> 18);
198+
heap[outIdx++] = 0x80 | ((u >> 12) & 63);
199+
heap[outIdx++] = 0x80 | ((u >> 6) & 63);
200+
heap[outIdx++] = 0x80 | (u & 63);
201+
}
202+
}
203+
// Null-terminate the pointer to the buffer.
204+
heap[outIdx] = 0;
205+
return outIdx - startIdx;
206+
},
207+
208+
/**
209+
* Copies the given Javascript String object 'str' to the emscripten HEAP at
210+
* address 'outPtr', null-terminated and encoded in UTF8 form. The copy will
211+
* require at most str.length*4+1 bytes of space in the HEAP.
212+
* Use the function lengthBytesUTF8 to compute the exact number of bytes
213+
* (excluding null terminator) that this function will write.
214+
*
215+
* @return {number} The number of bytes written, EXCLUDING the null terminator.
216+
*/
217+
$stringToUTF8__deps: ['$stringToUTF8Array'],
218+
$stringToUTF8: function(str, outPtr, maxBytesToWrite) {
219+
#if ASSERTIONS
220+
assert(typeof maxBytesToWrite == 'number', 'stringToUTF8(str, outPtr, maxBytesToWrite) is missing the third parameter that specifies the length of the output buffer!');
221+
#endif
222+
return stringToUTF8Array(str, {{{ heapAndOffset('HEAPU8', 'outPtr') }}}, maxBytesToWrite);
223+
},
224+
225+
/**
226+
* Returns the number of bytes the given Javascript string takes if encoded as a
227+
* UTF8 byte array, EXCLUDING the null terminator byte.
228+
*
229+
* @param {string} str - JavaScript string to operator on
230+
* @return {number} Length, in bytes, of the UTF8 encoded string.
231+
*/
232+
$lengthBytesUTF8: function(str) {
233+
var len = 0;
234+
for (var i = 0; i < str.length; ++i) {
235+
// Gotcha: charCodeAt returns a 16-bit word that is a UTF-16 encoded code
236+
// unit, not a Unicode code point of the character! So decode
237+
// UTF16->UTF32->UTF8.
238+
// See http://unicode.org/faq/utf_bom.html#utf16-3
239+
var c = str.charCodeAt(i); // possibly a lead surrogate
240+
if (c <= 0x7F) {
241+
len++;
242+
} else if (c <= 0x7FF) {
243+
len += 2;
244+
} else if (c >= 0xD800 && c <= 0xDFFF) {
245+
len += 4; ++i;
246+
} else {
247+
len += 3;
248+
}
249+
}
250+
return len;
251+
},
252+
10253
$intArrayFromString__docs: '/** @type {function(string, boolean=, number=)} */',
254+
$intArrayFromString__deps: ['$lengthBytesUTF8', '$stringToUTF8Array'],
11255
$intArrayFromString: intArrayFromString,
12256
$intArrayToString: intArrayToString,
13257

@@ -226,6 +470,7 @@ mergeInto(LibraryManager.library, {
226470

227471
// Allocate heap space for a JS string, and write it there.
228472
// It is the responsibility of the caller to free() that memory.
473+
$stringToNewUTF8__deps: ['$lengthBytesUTF8', '$stringToUTF8Array'],
229474
$stringToNewUTF8: function(str) {
230475
var size = lengthBytesUTF8(str) + 1;
231476
var ret = {{{ makeMalloc('stringToNewUTF8', 'size') }}};
@@ -234,6 +479,7 @@ mergeInto(LibraryManager.library, {
234479
},
235480

236481
// Allocate stack space for a JS string, and write it there.
482+
$stringToNewUTF8OnStack__deps: ['$lengthBytesUTF8', '$stringToUTF8Array'],
237483
$stringToNewUTF8OnStack: function(str) {
238484
var size = lengthBytesUTF8(str) + 1;
239485
var ret = stackAlloc(size);
@@ -247,6 +493,7 @@ mergeInto(LibraryManager.library, {
247493
// in a maximum length that can be used to be secure from out of bounds
248494
// writes.
249495
$writeStringToMemory__docs: '/** @deprecated @param {boolean=} dontAddNull */',
496+
$writeStringToMemory__dpes: ['$lengthBytesUTF8', '$stringToUTF8Array'],
250497
$writeStringToMemory: function(string, buffer, dontAddNull) {
251498
warnOnce('writeStringToMemory is deprecated and should not be called! Use stringToUTF8() instead!');
252499

@@ -280,4 +527,5 @@ mergeInto(LibraryManager.library, {
280527
// Null-terminate the pointer to the HEAP.
281528
if (!dontAddNull) {{{ makeSetValue('buffer', 0, 0, 'i8') }}};
282529
},
530+
283531
});

0 commit comments

Comments
 (0)