pytorch · dbort · Sep 19, 2023 · Sep 19, 2023
diff --git a/examples/bundled_executor_runner/bundled_executor_runner.cpp b/examples/bundled_executor_runner/bundled_executor_runner.cpp
@@ -31,8 +31,7 @@
 #include <executorch/util/bundled_program_verification.h>
 #include <executorch/util/util.h>
 
-static constexpr size_t kRuntimeMemorySize = 4 * 1024U * 1024U; // 4 MB
-static uint8_t runtime_pool[kRuntimeMemorySize];
+static uint8_t method_allocator_pool[4 * 1024U * 1024U]; // 4MB
 static constexpr size_t kBundledAllocatorPoolSize = 16 * 1024U;
 static uint8_t bundled_allocator_pool[kBundledAllocatorPoolSize];
 
@@ -138,70 +137,52 @@ int main(int argc, char** argv) {
   // do it dynamically.
   //
 
-  // The runtime allocator is used to allocate all dynamic C++ metadata/objects
-  // used to represent the loaded program. This allocator is only used during
+  // The method allocator is used to allocate all dynamic C++ metadata/objects
+  // used to represent the loaded method. This allocator is only used during
   // loading a method of the program, which will return an error if there was
   // not enough memory.
   //
-  // The amount of memory required depends on the loaded program and the runtime
+  // The amount of memory required depends on the loaded method and the runtime
   // code itself. The amount of memory here is usually determined by running the
-  // program and seeing how much memory is actually used, though it's possible
-  // to subclass MemoryAllocator so that it calls malloc() under the hood.
-
-  // In this example we using statically allocated gloabl runtime_pool of
-  // size kRuntimeMemorySize
-  MemoryAllocator runtime_allocator{
-      MemoryAllocator(kRuntimeMemorySize, runtime_pool)};
-  runtime_allocator.enable_profiling("runtime allocator");
+  // method and seeing how much memory is actually used, though it's possible to
+  // subclass MemoryAllocator so that it calls malloc() under the hood (see
+  // MallocMemoryAllocator).
+  //
+  // In this example we use a statically allocated memory pool.
+  MemoryAllocator method_allocator{
+      MemoryAllocator(sizeof(method_allocator_pool), method_allocator_pool)};
+  method_allocator.enable_profiling("method allocator");
 
-  // The non-const buffers will back the mutable tensors used by the method. The
-  // sizes of these buffers were determined ahead of time during the
+  // The memory-planned buffers will back the mutable tensors used by the
+  // method. The sizes of these buffers were determined ahead of time during the
   // memory-planning pasees.
   //
   // Each buffer typically corresponds to a different hardware memory bank. Most
   // mobile environments will only have a single buffer. Some embedded
   // environments may have more than one for, e.g., slow/large DRAM and
   // fast/small SRAM, or for memory associated with particular cores.
-  std::vector<std::unique_ptr<uint8_t[]>> non_const_buffers;
-  std::vector<Span<uint8_t>> non_const_spans;
-  size_t num_non_const_buffers = method_meta->num_non_const_buffers();
-  for (size_t id = 0; id < num_non_const_buffers; ++id) {
-    // .get() will always succeed because id < num_non_const_buffers.
+  std::vector<std::unique_ptr<uint8_t[]>> planned_buffers; // Owns the memory
+  std::vector<Span<uint8_t>> planned_spans; // Passed to the allocator
+  size_t num_memory_planned_buffers = method_meta->num_memory_planned_buffers();
+  for (size_t id = 0; id < num_memory_planned_buffers; ++id) {
+    // .get() will always succeed because id < num_memory_planned_buffers.
     size_t buffer_size =
-        static_cast<size_t>(method_meta->non_const_buffer_size(id).get());
-    ET_LOG(Info, "Setting up non-const buffer %zu, size %zu.", id, buffer_size);
-    non_const_buffers.push_back(std::make_unique<uint8_t[]>(buffer_size));
-    non_const_spans.push_back({non_const_buffers.back().get(), buffer_size});
+        static_cast<size_t>(method_meta->memory_planned_buffer_size(id).get());
+    ET_LOG(Info, "Setting up planned buffer %zu, size %zu.", id, buffer_size);
+    planned_buffers.push_back(std::make_unique<uint8_t[]>(buffer_size));
+    planned_spans.push_back({planned_buffers.back().get(), buffer_size});
   }
-  HierarchicalAllocator non_const_allocator(
-      {non_const_spans.data(), non_const_spans.size()});
-
-  // Allocator for bundled input.
-  MemoryAllocator bundled_input_allocator{
-      MemoryAllocator(kBundledAllocatorPoolSize, bundled_allocator_pool)};
-
-  // The constant allocator is not currently used. Please initialize with a
-  // zero-sized allocator.
-  MemoryAllocator const_allocator{MemoryAllocator(0, nullptr)};
-  const_allocator.enable_profiling("const allocator");
-
-  // The kernel temporary allocator is not currently used. Please initialize
-  // with a zero-sized allocator.
-  MemoryAllocator temp_allocator{MemoryAllocator(0, nullptr)};
-  temp_allocator.enable_profiling("temp allocator");
+  HierarchicalAllocator planned_memory(
+      {planned_spans.data(), planned_spans.size()});
 
   // Assemble all of the allocators into the MemoryManager that the Executor
   // will use.
-  MemoryManager memory_manager(
-      &const_allocator,
-      &non_const_allocator,
-      &runtime_allocator,
-      &temp_allocator);
+  MemoryManager memory_manager(&method_allocator, &planned_memory);
 
   //
-  // Load method from the program, using the provided
-  // allocators. Running the method can mutate allocated non_const buffers,
-  // so should only be used by a single thread at at time, but it can be reused.
+  // Load the method from the program, using the provided allocators. Running
+  // the method can mutate the memory-planned buffers, so the method should only
+  // be used by a single thread at at time, but it can be reused.
   //
 
   Result<Method> method = program->load_method(method_name, &memory_manager);
@@ -214,6 +195,8 @@ int main(int argc, char** argv) {
 
   // Prepare the inputs.
   // Use ones-initialized inputs or bundled inputs.
+  MemoryAllocator bundled_input_allocator{
+      MemoryAllocator(kBundledAllocatorPoolSize, bundled_allocator_pool)};
   exec_aten::ArrayRef<void*> inputs;
   if (FLAGS_bundled_program) {
     // Use the inputs embedded in the bundled program.

diff --git a/examples/executor_runner/executor_runner.cpp b/examples/executor_runner/executor_runner.cpp
@@ -29,8 +29,7 @@
 #include <executorch/runtime/platform/runtime.h>
 #include <executorch/util/util.h>
 
-static constexpr size_t kRuntimeMemorySize = 4 * 1024U * 1024U; // 4 MB
-static uint8_t runtime_pool[kRuntimeMemorySize];
+static uint8_t method_allocator_pool[4 * 1024U * 1024U]; // 4 MB
 
 DEFINE_string(
     model_path,
@@ -98,66 +97,52 @@ int main(int argc, char** argv) {
   // do it dynamically.
   //
 
-  // The runtime allocator is used to allocate all dynamic C++ metadata/objects
-  // used to represent the loaded program. This allocator is only used during
+  // The method allocator is used to allocate all dynamic C++ metadata/objects
+  // used to represent the loaded method. This allocator is only used during
   // loading a method of the program, which will return an error if there was
   // not enough memory.
   //
-  // The amount of memory required depends on the loaded program and the runtime
+  // The amount of memory required depends on the loaded method and the runtime
   // code itself. The amount of memory here is usually determined by running the
-  // program and seeing how much memory is actually used, though it's possible
-  // to subclass MemoryAllocator so that it calls malloc() under the hood.
-
-  // In this example we using statically allocated gloabl runtime_pool of
-  // size kRuntimeMemorySize
-  MemoryAllocator runtime_allocator{
-      MemoryAllocator(kRuntimeMemorySize, runtime_pool)};
-  runtime_allocator.enable_profiling("runtime allocator");
+  // method and seeing how much memory is actually used, though it's possible to
+  // subclass MemoryAllocator so that it calls malloc() under the hood (see
+  // MallocMemoryAllocator).
+  //
+  // In this example we use a statically allocated memory pool.
+  MemoryAllocator method_allocator{
+      MemoryAllocator(sizeof(method_allocator_pool), method_allocator_pool)};
+  method_allocator.enable_profiling("method allocator");
 
-  // The non-const buffers will back the mutable tensors used by the method. The
-  // sizes of these buffers were determined ahead of time during the
+  // The memory-planned buffers will back the mutable tensors used by the
+  // method. The sizes of these buffers were determined ahead of time during the
   // memory-planning pasees.
   //
   // Each buffer typically corresponds to a different hardware memory bank. Most
   // mobile environments will only have a single buffer. Some embedded
   // environments may have more than one for, e.g., slow/large DRAM and
   // fast/small SRAM, or for memory associated with particular cores.
-  std::vector<std::unique_ptr<uint8_t[]>> non_const_buffers;
-  std::vector<Span<uint8_t>> non_const_spans;
-  size_t num_non_const_buffers = method_meta->num_non_const_buffers();
-  for (size_t id = 0; id < num_non_const_buffers; ++id) {
-    // .get() will always succeed because id < num_non_const_buffers.
+  std::vector<std::unique_ptr<uint8_t[]>> planned_buffers; // Owns the memory
+  std::vector<Span<uint8_t>> planned_spans; // Passed to the allocator
+  size_t num_memory_planned_buffers = method_meta->num_memory_planned_buffers();
+  for (size_t id = 0; id < num_memory_planned_buffers; ++id) {
+    // .get() will always succeed because id < num_memory_planned_buffers.
     size_t buffer_size =
-        static_cast<size_t>(method_meta->non_const_buffer_size(id).get());
-    ET_LOG(Info, "Setting up non-const buffer %zu, size %zu.", id, buffer_size);
-    non_const_buffers.push_back(std::make_unique<uint8_t[]>(buffer_size));
-    non_const_spans.push_back({non_const_buffers.back().get(), buffer_size});
+        static_cast<size_t>(method_meta->memory_planned_buffer_size(id).get());
+    ET_LOG(Info, "Setting up planned buffer %zu, size %zu.", id, buffer_size);
+    planned_buffers.push_back(std::make_unique<uint8_t[]>(buffer_size));
+    planned_spans.push_back({planned_buffers.back().get(), buffer_size});
   }
-  HierarchicalAllocator non_const_allocator(
-      {non_const_spans.data(), non_const_spans.size()});
-
-  // The constant allocator is not currently used. Please initialize with a
-  // zero-sized allocator.
-  MemoryAllocator const_allocator{MemoryAllocator(0, nullptr)};
-  const_allocator.enable_profiling("const allocator");
-
-  // The kernel temporary allocator is not currently used. Please initialize
-  // with a zero-sized allocator.
-  MemoryAllocator temp_allocator{MemoryAllocator(0, nullptr)};
-  temp_allocator.enable_profiling("temp allocator");
+  HierarchicalAllocator planned_memory(
+      {planned_spans.data(), planned_spans.size()});
 
   // Assemble all of the allocators into the MemoryManager that the Executor
   // will use.
-  MemoryManager memory_manager(
-      &const_allocator,
-      &non_const_allocator,
-      &runtime_allocator,
-      &temp_allocator);
+  MemoryManager memory_manager(&method_allocator, &planned_memory);
 
   //
-  // Load method from the program, using the provided
-  // allocators. Running the method can mutate allocated non_const buffers,
-  // so should only be used by a single thread at at time, but it can be reused.
+  // Load the method from the program, using the provided allocators. Running
+  // the method can mutate the memory-planned buffers, so the method should only
+  // be used by a single thread at at time, but it can be reused.
   //
 
   Result<Method> method = program->load_method(method_name, &memory_manager);

diff --git a/exir/backend/test/demos/rpc/ExecutorBackend.cpp b/exir/backend/test/demos/rpc/ExecutorBackend.cpp
@@ -73,50 +73,39 @@ class ExecutorBackend final : public PyTorchBackendInterface {
     }
 
     // Building all different allocators for the client executor
-    auto client_const_allocator = ET_ALLOCATE_INSTANCE_OR_RETURN_ERROR(
-        runtime_allocator, MemoryAllocator);
-    new (client_const_allocator) MemoryAllocator(0, nullptr);
-
-    auto num_non_const_buffers = method_meta->num_non_const_buffers();
+    auto num_memory_planned_buffers = method_meta->num_memory_planned_buffers();
 
-    Span<uint8_t>* non_const_buffers = ET_ALLOCATE_LIST_OR_RETURN_ERROR(
-        runtime_allocator, Span<uint8_t>, num_non_const_buffers);
+    Span<uint8_t>* memory_planned_buffers = ET_ALLOCATE_LIST_OR_RETURN_ERROR(
+        runtime_allocator, Span<uint8_t>, num_memory_planned_buffers);
 
-    for (size_t id = 0; id < num_non_const_buffers; ++id) {
-      size_t buffer_size =
-          static_cast<size_t>(method_meta->non_const_buffer_size(id).get());
+    for (size_t id = 0; id < num_memory_planned_buffers; ++id) {
+      size_t buffer_size = static_cast<size_t>(
+          method_meta->memory_planned_buffer_size(id).get());
       uint8_t* buffer_i = ET_ALLOCATE_LIST_OR_RETURN_ERROR(
           runtime_allocator, uint8_t, buffer_size);
-      non_const_buffers[id] = {buffer_i, buffer_size};
+      memory_planned_buffers[id] = {buffer_i, buffer_size};
     }
 
-    auto client_non_const_allocator = ET_ALLOCATE_INSTANCE_OR_RETURN_ERROR(
+    auto client_planned_memory = ET_ALLOCATE_INSTANCE_OR_RETURN_ERROR(
         runtime_allocator, HierarchicalAllocator);
-    new (client_non_const_allocator)
-        HierarchicalAllocator({non_const_buffers, num_non_const_buffers});
+    new (client_planned_memory) HierarchicalAllocator(
+        {memory_planned_buffers, num_memory_planned_buffers});
 
     // Allocate some memory from runtime allocator for the client executor, in
     // real case, like if it's an executor in dsp, it should allocate memory
     // dedicated to this specific hardware
-    auto client_runtime_allocator = ET_ALLOCATE_INSTANCE_OR_RETURN_ERROR(
+    auto client_method_allocator = ET_ALLOCATE_INSTANCE_OR_RETURN_ERROR(
         runtime_allocator, MemoryAllocator);
     const size_t kClientRuntimeMemorySize = 4 * 1024U;
     auto runtime_pool = ET_ALLOCATE_OR_RETURN_ERROR(
         runtime_allocator, kClientRuntimeMemorySize);
-    new (client_runtime_allocator) MemoryAllocator(
+    new (client_method_allocator) MemoryAllocator(
         kClientRuntimeMemorySize, static_cast<uint8_t*>(runtime_pool));
 
-    auto client_temp_allocator = ET_ALLOCATE_INSTANCE_OR_RETURN_ERROR(
-        runtime_allocator, MemoryAllocator);
-    new (client_temp_allocator) MemoryAllocator(0, nullptr);
-
     auto client_memory_manager =
         ET_ALLOCATE_INSTANCE_OR_RETURN_ERROR(runtime_allocator, MemoryManager);
-    new (client_memory_manager) MemoryManager(
-        client_const_allocator,
-        client_non_const_allocator,
-        client_runtime_allocator,
-        client_temp_allocator);
+    new (client_memory_manager)
+        MemoryManager(client_method_allocator, client_planned_memory);
 
     // Construct the client Method
     Result<Method> method_res =