emscripten-core · kripken · Nov 22, 2023 · Nov 13, 2023 · Nov 14, 2023 · Nov 14, 2023
diff --git a/system/lib/emmalloc.c b/system/lib/emmalloc.c
@@ -671,12 +671,9 @@ static void *attempt_allocate(Region *freeRegion, size_t alignment, size_t size)
 
 static size_t validate_alloc_alignment(size_t alignment)
 {
-  // Cannot perform allocations that are less than 4 byte aligned, because the Region
-  // control structures need to be aligned. Also round up to minimum outputted alignment.
-  alignment = MAX(alignment, MALLOC_ALIGNMENT);
-  // Arbitrary upper limit on alignment - very likely a programming bug if alignment is higher than this.
-  assert(alignment <= 1024*1024);
-  return alignment;
+  // Cannot perform allocations that are less our minimal alignment, because
+  // the Region control structures need to be aligned themselves.
+  return MAX(alignment, MALLOC_ALIGNMENT);
 }
 
 static size_t validate_alloc_size(size_t size)
@@ -808,6 +805,22 @@ static void *allocate_memory(size_t alignment, size_t size)
 
   // We were unable to find a free memory region. Must sbrk() in more memory!
   size_t numBytesToClaim = size+sizeof(Region)*3;
+  // Take into account the alignment as well. For typical alignment we don't
+  // need to add anything here (so we do nothing if the alignment is equal to
+  // MALLOC_ALIGNMENT), but it can matter if the alignment is very high. In that
+  // case, not adding the alignment can lead to this sbrk not giving us enough
+  // (in which case, the next attempt fails and will sbrk the same amount again,
+  // potentially allocating a lot more memory than necessary).
 // Before we proceed further, fix up the boundary between this and the preceding region, 
 // so that the boundary between the two regions happens at a right spot for the payload to be aligned. 
 if (payloadStartPtr != payloadStartPtrAligned) 
 { 
   Region *prevRegion = prev_region((Region*)freeRegion); 
   // We never have two free regions adjacent to each other, so the region before this free 
   // region should be in use. 
   assert(region_is_in_use(prevRegion)); 
   size_t regionBoundaryBumpAmount = payloadStartPtrAligned - payloadStartPtr; 
   size_t newThisRegionSize = freeRegion->size - regionBoundaryBumpAmount; 
   create_used_region(prevRegion, prevRegion->size + regionBoundaryBumpAmount); 
   freeRegion = (Region *)((uint8_t*)freeRegion + regionBoundaryBumpAmount); 
   freeRegion->size = newThisRegionSize; 
 } 
 // Before we proceed further, fix up the boundary between this and the preceding region, 
 // so that the boundary between the two regions happens at a right spot for the payload to be aligned. 
 if (payloadStartPtr != payloadStartPtrAligned) 
 { 
   Region *prevRegion = prev_region((Region*)freeRegion); 
   // We never have two free regions adjacent to each other, so the region before this free 
   // region should be in use. 
   assert(region_is_in_use(prevRegion)); 
   size_t regionBoundaryBumpAmount = payloadStartPtrAligned - payloadStartPtr; 
   size_t newThisRegionSize = freeRegion->size - regionBoundaryBumpAmount; 
   create_used_region(prevRegion, prevRegion->size + regionBoundaryBumpAmount); 
   freeRegion = (Region *)((uint8_t*)freeRegion + regionBoundaryBumpAmount); 
   freeRegion->size = newThisRegionSize; 
 } 
+  //
+  // Note that this is not necessarily optimal, as the extra allocation size for
+  // the alignment might not be needed (if we are lucky and already aligned),
+  // and even if it helps us allocate it will not immediately be ready for reuse
+  // (as it will be added to the currently-in-use region before us, so it is not
+  // in a free list). As a compromise however it seems reasonable in practice as
+  // a way to handle large aligned regions to avoid even worse waste.
+  if (alignment > MALLOC_ALIGNMENT) {
+    numBytesToClaim += alignment;
+  }
   assert(numBytesToClaim > size); // 32-bit wraparound should not happen here, allocation size has been validated above!
   bool success = claim_more_memory(numBytesToClaim);
   if (success)

diff --git a/test/code_size/embind_val_wasm.json b/test/code_size/embind_val_wasm.json
@@ -3,8 +3,8 @@
   "a.html.gz": 431,
   "a.js": 7498,
   "a.js.gz": 3142,
-  "a.wasm": 11533,
-  "a.wasm.gz": 5767,
-  "total": 19704,
-  "total_gz": 9340
+  "a.wasm": 11515,
+  "a.wasm.gz": 5771,
+  "total": 19686,
+  "total_gz": 9344
 }
diff --git a/test/code_size/hello_wasm_worker_wasm.json b/test/code_size/hello_wasm_worker_wasm.json
@@ -3,8 +3,8 @@
   "a.html.gz": 433,
   "a.js": 667,
   "a.js.gz": 458,
-  "a.wasm": 1863,
-  "a.wasm.gz": 1051,
-  "total": 3267,
-  "total_gz": 1942
+  "a.wasm": 1858,
+  "a.wasm.gz": 1058,
+  "total": 3262,
+  "total_gz": 1949
 }
diff --git a/test/code_size/hello_webgl2_wasm.json b/test/code_size/hello_webgl2_wasm.json
@@ -3,8 +3,8 @@
   "a.html.gz": 379,
   "a.js": 4699,
   "a.js.gz": 2419,
-  "a.wasm": 10475,
-  "a.wasm.gz": 6710,
-  "total": 15743,
-  "total_gz": 9508
+  "a.wasm": 10468,
+  "a.wasm.gz": 6719,
+  "total": 15736,
+  "total_gz": 9517
 }
diff --git a/test/code_size/hello_webgl2_wasm2js.json b/test/code_size/hello_webgl2_wasm2js.json
@@ -1,10 +1,10 @@
 {
   "a.html": 567,
   "a.html.gz": 379,
-  "a.js": 17921,
-  "a.js.gz": 8079,
+  "a.js": 17841,
+  "a.js.gz": 8088,
   "a.mem": 3171,
   "a.mem.gz": 2713,
-  "total": 21659,
-  "total_gz": 11171
+  "total": 21579,
+  "total_gz": 11180
 }
diff --git a/test/code_size/hello_webgl_wasm.json b/test/code_size/hello_webgl_wasm.json
@@ -3,8 +3,8 @@
   "a.html.gz": 379,
   "a.js": 4185,
   "a.js.gz": 2243,
-  "a.wasm": 10475,
-  "a.wasm.gz": 6710,
-  "total": 15229,
-  "total_gz": 9332
+  "a.wasm": 10468,
+  "a.wasm.gz": 6719,
+  "total": 15222,
+  "total_gz": 9341
 }
diff --git a/test/code_size/hello_webgl_wasm2js.json b/test/code_size/hello_webgl_wasm2js.json
@@ -1,10 +1,10 @@
 {
   "a.html": 567,
   "a.html.gz": 379,
-  "a.js": 17399,
-  "a.js.gz": 7910,
+  "a.js": 17319,
+  "a.js.gz": 7908,
   "a.mem": 3171,
   "a.mem.gz": 2713,
-  "total": 21137,
-  "total_gz": 11002
+  "total": 21057,
+  "total_gz": 11000
 }
diff --git a/test/other/test_emmalloc_high_align.c b/test/other/test_emmalloc_high_align.c
@@ -0,0 +1,32 @@
+#include <assert.h>
+#include <emscripten/console.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+size_t sizeT(void* p) {
+  return (size_t)p;
+}
+
+int main() {
+  const size_t MB = 1024 * 1024;
+  const size_t ALIGN = 4 * MB;
+  const size_t SIZE = 32 * MB;
+
+  // Allocate a very large chunk of memory (32MB) with very high alignment (4
+  // MB). This is similar to what mimalloc does in practice.
+  void* before = sbrk(0);
+  void* p = aligned_alloc(ALIGN, SIZE);
+  void* after = sbrk(0);
+  emscripten_console_logf("before: %p  after: %p  p: %p\n", before, after, p);
+
+  // The allocation must be properly aligned.
+  assert(sizeT(p) % ALIGN == 0);
+
+  // We should only have sbrk'd a reasonable amount (this is a regression test
+  // for #20645 where we sbrk'd double the necessary amount). We expect at most
+  // 36 MB (the size of the allocation plus the alignment) plus a few bytes of
+  // general overhead.
+  assert(sizeT(after) - sizeT(before) < ALIGN + SIZE + 100);
+
+  emscripten_console_log("success");
+}
diff --git a/test/other/test_emmalloc_high_align.out b/test/other/test_emmalloc_high_align.out
@@ -0,0 +1 @@
+success
diff --git a/test/test_other.py b/test/test_other.py
@@ -7494,6 +7494,10 @@ def test(args, text=None):
     test(['-sALLOW_MEMORY_GROWTH', '-sMAXIMUM_MEMORY=1GB'])
     test(['-sALLOW_MEMORY_GROWTH', '-sMAXIMUM_MEMORY=4GB'])
 
+  def test_emmalloc_high_align(self):
+    self.do_other_test('test_emmalloc_high_align.c',
+                       emcc_args=['-sMALLOC=emmalloc', '-sINITIAL_MEMORY=128MB'])
+
   def test_2GB_plus(self):
     # when the heap size can be over 2GB, we rewrite pointers to be unsigned
     def test(page_diff):