Skip to content

Commit b6ea0f2

Browse files
committed
Add implementation of emscripten_memcpy_big based on bulk memory.
These new functions live in `libbulkmemory` which only gets included if bulk memory is enabled (either via `-mbulk-memory` directly or indirectly via `-pthread). benchmark results for benchmark.test_memcpy_1mb: ``` v8: mean: 1.666 v8-bulkmemory: mean: 1.598 v8-standalone-bulkmemory: mean: 1.576 v8-standalone: mean: 3.197 ``` Here we can see the that when bulk memory is enabled its at least as fast if not faster than the JS version. v8-standalone doesn't have emscripten_memcpy_big at all is is much slower, as expected. By adding `-mbulk-memory` the standalone version becomes just as fast as the non-standalone.
1 parent 578a13a commit b6ea0f2

14 files changed

+145
-17
lines changed

emcc.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1603,6 +1603,7 @@ def phase_setup(options, state, newargs):
16031603
if '-matomics' not in newargs:
16041604
newargs += ['-matomics']
16051605
if '-mbulk-memory' not in newargs:
1606+
settings.BULK_MEMORY = 1
16061607
newargs += ['-mbulk-memory']
16071608

16081609
if 'DISABLE_EXCEPTION_CATCHING' in user_settings and 'EXCEPTION_CATCHING_ALLOWED' in user_settings:
@@ -2440,6 +2441,8 @@ def phase_linker_setup(options, state, newargs):
24402441
settings.JS_LIBRARIES.append((0, shared.path_from_root('src', 'library_wasm_worker.js')))
24412442

24422443
settings.SUPPORTS_GLOBALTHIS = feature_matrix.caniuse(feature_matrix.Feature.GLOBALTHIS)
2444+
if not settings.BULK_MEMORY:
2445+
settings.BULK_MEMORY = feature_matrix.caniuse(feature_matrix.Feature.BULK_MEMORY)
24432446

24442447
if settings.AUDIO_WORKLET:
24452448
if not settings.SUPPORTS_GLOBALTHIS:
@@ -3571,6 +3574,8 @@ def consume_arg_file():
35713574
settings.DISABLE_EXCEPTION_CATCHING = 1
35723575
settings.DISABLE_EXCEPTION_THROWING = 1
35733576
settings.WASM_EXCEPTIONS = 0
3577+
elif arg == '-mbulk-memory':
3578+
settings.BULK_MEMORY = 1
35743579
elif arg == '-fexceptions':
35753580
# TODO Currently -fexceptions only means Emscripten EH. Switch to wasm
35763581
# exception handling by default when -fexceptions is given when wasm

src/library.js

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -389,9 +389,11 @@ mergeInto(LibraryManager.library, {
389389
// variant, so we should never emit emscripten_memcpy_big() in the build.
390390
// In STANDALONE_WASM we avoid the emscripten_memcpy_big dependency so keep
391391
// the wasm file standalone.
392+
// In BULK_MEMORY mode we include native versions of these functions based
393+
// on memory.fill and memory.copy.
392394
// In MAIN_MODULE=1 or EMCC_FORCE_STDLIBS mode all of libc is force included
393395
// so we cannot override parts of it, and therefore cannot use libc_optz.
394-
#if (SHRINK_LEVEL < 2 || LINKABLE || process.env.EMCC_FORCE_STDLIBS) && !STANDALONE_WASM
396+
#if (SHRINK_LEVEL < 2 || LINKABLE || process.env.EMCC_FORCE_STDLIBS) && !STANDALONE_WASM && !BULK_MEMORY
395397

396398
#if MIN_CHROME_VERSION < 45 || MIN_EDGE_VERSION < 14 || MIN_FIREFOX_VERSION < 34 || MIN_IE_VERSION != TARGET_NOT_SUPPORTED || MIN_SAFARI_VERSION < 100101
397399
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/TypedArray/copyWithin lists browsers that support TypedArray.prototype.copyWithin, but it

src/settings_internal.js

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -256,3 +256,5 @@ var POST_JS_FILES = [];
256256

257257
// Set when -pthread / -sPTHREADS is passed
258258
var PTHREADS = false;
259+
260+
var BULK_MEMORY = false;

system/lib/libc/emscripten_internal.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ extern "C" {
3030
void emscripten_memcpy_big(void* __restrict__ dest,
3131
const void* __restrict__ src,
3232
size_t n) EM_IMPORT(emscripten_memcpy_big);
33+
void emscripten_memset_big(void* ptr, char value, size_t n);
3334

3435
void emscripten_notify_memory_growth(size_t memory_index);
3536

system/lib/libc/emscripten_memcpy.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ static void *__memcpy(void *restrict dest, const void *restrict src, size_t n) {
2929
unsigned char *block_aligned_d_end;
3030
unsigned char *d_end;
3131

32-
#ifndef EMSCRIPTEN_STANDALONE_WASM
32+
#if !defined(EMSCRIPTEN_STANDALONE_WASM) || defined(__wasm_bulk_memory__)
3333
if (n >= 512) {
3434
emscripten_memcpy_big(dest, src, n);
3535
return dest;
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
#ifdef __wasm64__
2+
#define PTR i64
3+
#else
4+
#define PTR i32
5+
#endif
6+
7+
.globl emscripten_memcpy_big
8+
emscripten_memcpy_big:
9+
.functype emscripten_memcpy_big (PTR, PTR, PTR) -> ()
10+
local.get 0
11+
local.get 1
12+
local.get 2
13+
memory.copy 0, 0
14+
end_function

system/lib/libc/emscripten_memset.c

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
1-
// XXX EMSCRIPTEN ASAN: build an uninstrumented version of memset
2-
#if defined(__EMSCRIPTEN__) && defined(__has_feature)
3-
#if __has_feature(address_sanitizer)
4-
#define memset __attribute__((no_sanitize("address"))) emscripten_builtin_memset
5-
#endif
1+
#include "emscripten_internal.h"
2+
3+
// build an uninstrumented version of memset
4+
#if defined(__has_feature) && __has_feature(address_sanitizer)
5+
__attribute__((no_sanitize("address"))) static void *memset(void *str, int c, size_t n);
6+
__attribute__((no_sanitize("address"))) void *__musl_memset(void *str, int c, size_t n);
67
#endif
78

89
#ifdef EMSCRIPTEN_OPTIMIZE_FOR_OZ
@@ -16,6 +17,21 @@ void *memset(void *str, int c, size_t n) {
1617
return str;
1718
}
1819

20+
#elif defined(__wasm_bulk_memory__)
21+
22+
__attribute__((__weak__)) void *__musl_memset(void *str, int c, size_t n);
23+
#define memset __musl_memset
24+
#include "musl/src/string/memset.c"
25+
#undef memset
26+
27+
void *memset(void *str, int c, size_t n) {
28+
if (n >= 512) {
29+
emscripten_memset_big(str, c, n);
30+
return str;
31+
}
32+
return __musl_memset(str, c, n);
33+
}
34+
1935
#else
2036

2137
#include "musl/src/string/memset.c"
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
#ifdef __wasm64__
2+
#define PTR i64
3+
#else
4+
#define PTR i32
5+
#endif
6+
7+
.globl emscripten_memset_big
8+
emscripten_memset_big:
9+
.functype emscripten_memset_big (PTR, i32, PTR) -> ()
10+
local.get 0
11+
local.get 1
12+
local.get 2
13+
memory.fill 0
14+
end_function

system/lib/standalone/standalone.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ int emscripten_resize_heap(size_t size) {
152152
}
153153

154154
double emscripten_get_now(void) {
155-
return (1000 * clock()) / (double)CLOCKS_PER_SEC;
155+
return (1000ll * clock()) / (double)CLOCKS_PER_SEC;
156156
}
157157

158158
// C++ ABI

test/other/test_memops_bulk_memory.c

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
#include <assert.h>
2+
#include <string.h>
3+
4+
const char *hello = "hello";
5+
const char *world = "world";
6+
7+
int main() {
8+
char buffer[100];
9+
memset(buffer, 'a', 100);
10+
memcpy(buffer, hello, strlen(hello) + 1);
11+
assert(strcmp(buffer, hello) == 0);
12+
return 0;
13+
}

test/test_benchmark.py

Lines changed: 31 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import zlib
1414
from pathlib import Path
1515
from typing import List
16+
from subprocess import CalledProcessError
1617

1718
if __name__ == '__main__':
1819
raise Exception('do not run this file directly; do something like: test/runner.py benchmark')
@@ -83,6 +84,7 @@ def bench(self, args, output_parser=None, reps=TEST_REPS, expected_output=None):
8384
except Exception as e:
8485
print(str(e))
8586
print('Parsing benchmark results failed, output was: ' + output)
87+
return
8688
self.times.append(curr)
8789

8890
def display(self, baseline=None):
@@ -158,7 +160,14 @@ def build(self, parent, filename, args, shared_args, emcc_args, native_args, nat
158160
self.filename = final
159161

160162
def run(self, args):
161-
return run_process([self.filename] + args, stdout=PIPE, stderr=PIPE, check=False).stdout
163+
try:
164+
return run_process([self.filename] + args, stdout=PIPE, stderr=PIPE, check=False).stdout
165+
except CalledProcessError as e:
166+
print("-- STDOUT:")
167+
print(e.stdout)
168+
print("-- STDERR:")
169+
print(e.stderr)
170+
raise
162171

163172
def get_output_files(self):
164173
return [self.filename]
@@ -214,7 +223,7 @@ def build(self, parent, filename, args, shared_args, emcc_args, native_args, nat
214223
if common.EMTEST_FORCE64:
215224
cmd += ['--profiling']
216225
else:
217-
cmd += ['--closure=1', '-sMINIMAL_RUNTIME']
226+
cmd += ['--closure=1'] #, '-sMINIMAL_RUNTIME']
218227
# add additional emcc args at the end, which may override other things
219228
# above, such as minimal runtime
220229
cmd += emcc_args + self.extra_args
@@ -229,7 +238,14 @@ def build(self, parent, filename, args, shared_args, emcc_args, native_args, nat
229238
self.filename = final
230239

231240
def run(self, args):
232-
return jsrun.run_js(self.filename, engine=self.engine, args=args, stderr=PIPE)
241+
try:
242+
return jsrun.run_js(self.filename, engine=self.engine, args=args, stderr=PIPE)
243+
except CalledProcessError as e:
244+
print("-- STDOUT:")
245+
print(e.stdout)
246+
print("-- STDERR:")
247+
print(e.stderr)
248+
raise
233249

234250
def get_output_files(self):
235251
ret = [self.filename]
@@ -356,7 +372,7 @@ def get_output_files(self):
356372

357373
if not common.EMTEST_FORCE64:
358374
benchmarkers += [
359-
NativeBenchmarker('clang', [CLANG_CC], [CLANG_CXX]),
375+
#NativeBenchmarker('clang', [CLANG_CC], [CLANG_CXX]),
360376
# NativeBenchmarker('gcc', ['gcc', '-no-pie'], ['g++', '-no-pie'])
361377
]
362378

@@ -372,9 +388,17 @@ def get_output_files(self):
372388
]
373389
else:
374390
benchmarkers += [
375-
EmscriptenBenchmarker(default_v8_name, aot_v8),
376-
EmscriptenBenchmarker(default_v8_name + '-lto', aot_v8, ['-flto']),
377-
EmscriptenBenchmarker(default_v8_name + '-ctors', aot_v8, ['-sEVAL_CTORS']),
391+
EmscriptenBenchmarker(default_v8_name, aot_v8,
392+
['-fno-builtin']),
393+
EmscriptenBenchmarker(default_v8_name + '-bulkmemory', aot_v8,
394+
['-mbulk-memory', '-fno-builtin']),
395+
EmscriptenBenchmarker(default_v8_name + '-standalone-bulkmemory', aot_v8,
396+
['-sSTANDALONE_WASM', '-sWASM_BIGINT', '-mbulk-memory', '-fno-builtin']),
397+
EmscriptenBenchmarker(default_v8_name + '-standalone', aot_v8,
398+
['-sSTANDALONE_WASM', '-sWASM_BIGINT', '-fno-builtin']),
399+
#EmscriptenBenchmarker(default_v8_name, aot_v8),
400+
#EmscriptenBenchmarker(default_v8_name + '-lto', aot_v8, ['-flto']),
401+
#EmscriptenBenchmarker(default_v8_name + '-ctors', aot_v8, ['-sEVAL_CTORS']),
378402
# EmscriptenWasm2CBenchmarker('wasm2c')
379403
]
380404
if os.path.exists(CHEERP_BIN):

test/test_other.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13412,3 +13412,24 @@ def test_wasi_random_get(self):
1341213412
@requires_node
1341313413
def test_wasi_sched_yield(self):
1341413414
self.run_wasi_test_suite_test('wasi_sched_yield')
13415+
13416+
def test_memops_bulk_memory(self):
13417+
self.emcc_args += ['--profiling-funcs', '-fno-builtin']
13418+
# By default we expect to find _emscripten_memcpy_big in the generaed JS and not in the
13419+
# native code.
13420+
self.do_runf(test_file('other/test_memops_bulk_memory.c'))
13421+
self.assertContained('_emscripten_memcpy_big', read_file('test_memops_bulk_memory.js'))
13422+
funcs = self.parse_wasm('test_memops_bulk_memory.wasm')[2]
13423+
self.assertNotIn('$emscripten_memcpy_big', funcs)
13424+
13425+
# With bulk memory enabled we expect *not* to find it.
13426+
self.do_runf(test_file('other/test_memops_bulk_memory.c'), emcc_args=['-mbulk-memory'])
13427+
self.assertNotContained('_emscripten_memcpy_big', read_file('test_memops_bulk_memory.js'))
13428+
funcs = self.parse_wasm('test_memops_bulk_memory.wasm')[2]
13429+
self.assertIn('$emscripten_memcpy_big', funcs)
13430+
13431+
# -pthread implicitly enables bulk memory too.
13432+
self.do_runf(test_file('other/test_memops_bulk_memory.c'), emcc_args=['-pthread'])
13433+
self.assertNotContained('_emscripten_memcpy_big', read_file('test_memops_bulk_memory.js'))
13434+
funcs = self.parse_wasm('test_memops_bulk_memory.wasm')[2]
13435+
self.assertIn('$emscripten_memcpy_big', funcs)

tools/settings.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@
7171
'DEFAULT_TO_CXX',
7272
'WASM_OBJECT_FILES',
7373
'WASM_WORKERS',
74+
'BULK_MEMORY',
7475

7576
# Internal settings used during compilation
7677
'EXCEPTION_CATCHING_ALLOWED',

tools/system_libs.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1274,6 +1274,20 @@ def can_use(self):
12741274
not settings.LINKABLE and not os.environ.get('EMCC_FORCE_STDLIBS')
12751275

12761276

1277+
class libbulkmemory(MuslInternalLibrary,
1278+
DebugLibrary,
1279+
AsanInstrumentedLibrary,
1280+
MTLibrary):
1281+
name = 'libbulkmemory'
1282+
src_dir = 'system/lib/libc'
1283+
src_files = ['emscripten_memcpy.c', 'emscripten_memset.c',
1284+
'emscripten_memcpy_big.S', 'emscripten_memset_big.S']
1285+
cflags = ['-mbulk-memory']
1286+
1287+
def can_use(self):
1288+
return super(libbulkmemory, self).can_use() and settings.BULK_MEMORY
1289+
1290+
12771291
class libprintf_long_double(libc):
12781292
name = 'libprintf_long_double'
12791293
cflags = ['-DEMSCRIPTEN_PRINTF_LONG_DOUBLE']
@@ -1945,7 +1959,7 @@ def get_files(self):
19451959
'__main_void.c'])
19461960
files += files_in_path(
19471961
path='system/lib/libc',
1948-
filenames=['emscripten_memcpy.c'])
1962+
filenames=['emscripten_memcpy.c', 'emscripten_memset.c'])
19491963
# It is more efficient to use JS methods for time, normally.
19501964
files += files_in_path(
19511965
path='system/lib/libc/musl/src/time',
@@ -2154,7 +2168,8 @@ def add_sanitizer_libs():
21542168
if settings.SHRINK_LEVEL >= 2 and not settings.LINKABLE and \
21552169
not os.environ.get('EMCC_FORCE_STDLIBS'):
21562170
add_library('libc_optz')
2157-
2171+
if settings.BULK_MEMORY:
2172+
add_library('libbulkmemory')
21582173
if settings.STANDALONE_WASM:
21592174
add_library('libstandalonewasm')
21602175
if settings.ALLOW_UNIMPLEMENTED_SYSCALLS:

0 commit comments

Comments
 (0)