pytorch · dbort · Sep 13, 2023 · Sep 13, 2023
@@ -77,20 +77,19 @@ class ExecutorBackend final : public PyTorchBackendInterface {
         runtime_allocator, MemoryAllocator);
     new (client_const_allocator) MemoryAllocator(0, nullptr);
 
-    auto num_buffers = method_meta->num_non_const_buffers();
-    size_t num_non_const_buffers = num_buffers - 1;
+    auto num_non_const_buffers = method_meta->num_non_const_buffers();
 
     uint8_t** non_const_buffers = ET_ALLOCATE_LIST_OR_RETURN_ERROR(
         runtime_allocator, uint8_t*, num_non_const_buffers);
     MemoryAllocator* non_const_allocators = ET_ALLOCATE_LIST_OR_RETURN_ERROR(
         runtime_allocator, MemoryAllocator, num_non_const_buffers);
 
-    for (size_t id = 1; id < num_buffers; ++id) {
+    for (size_t id = 0; id < num_non_const_buffers; ++id) {
       auto buffer_size = method_meta->non_const_buffer_size(id);
       uint8_t* buffer_i = ET_ALLOCATE_LIST_OR_RETURN_ERROR(
           runtime_allocator, uint8_t, buffer_size.get());
-      non_const_buffers[id - 1] = buffer_i;
-      new (&non_const_allocators[id - 1])
+      non_const_buffers[id] = buffer_i;
+      new (&non_const_allocators[id])
           MemoryAllocator(static_cast<uint32_t>(buffer_size.get()), buffer_i);
     }
 

@@ -83,8 +83,7 @@ class Module final {
     for (size_t i = 0; i < program_->num_methods(); ++i) {
       auto name = program_->get_method_name(i).get();
       auto method_meta = program_->method_meta(name).get();
-      // 1 on purpose because non-const are 1 indexed
-      for (size_t j = 1; j < method_meta.num_non_const_buffers(); j++) {
+      for (size_t j = 0; j < method_meta.num_non_const_buffers(); j++) {
         int64_t buffer_size = method_meta.non_const_buffer_size(j).get();
         if (non_const_buffer_sizes.find(j) == non_const_buffer_sizes.end()) {
           non_const_buffer_sizes.insert({j, buffer_size});

@@ -170,7 +170,10 @@ Result<TensorInfo> MethodMeta::output_tensor_meta(size_t index) const {
 }
 
 size_t MethodMeta::num_non_const_buffers() const {
-  return s_plan_->non_const_buffer_sizes()->size();
+  // Index zero is reserved internally, and we hide it from users. The actual
+  // number of buffers is one fewer than the actual size of this list in the
+  // program.
+  return s_plan_->non_const_buffer_sizes()->size() - 1;
 }
 
 Result<int64_t> MethodMeta::non_const_buffer_size(size_t index) const {
@@ -181,7 +184,9 @@ Result<int64_t> MethodMeta::non_const_buffer_size(size_t index) const {
       "index %zu out of range. num_buffers: %zu",
       index,
       num_buffers);
-  return s_plan_->non_const_buffer_sizes()->Get(index);
+  // Index zero is reserved internally, and we hide it from users. Adjust the
+  // provided index to point to one of the actual buffers.
+  return s_plan_->non_const_buffer_sizes()->Get(index + 1);
 }
 
 } // namespace executor

@@ -76,14 +76,14 @@ TEST_F(MethodMetaTest, MethodMetaApi) {
   EXPECT_EQ(method_meta->num_outputs(), 1);
 
   // Appropriate amount of non_const_buffers
-  EXPECT_EQ(method_meta->num_non_const_buffers(), 2);
+  EXPECT_EQ(method_meta->num_non_const_buffers(), 1);
 
   // Appropriate content of non_const_buffers
-  EXPECT_EQ(method_meta->non_const_buffer_size(1).get(), 48);
+  EXPECT_EQ(method_meta->non_const_buffer_size(0).get(), 48);
 
   // Invalid index Errors
   EXPECT_EQ(
-      method_meta->non_const_buffer_size(2).error(), Error::InvalidArgument);
+      method_meta->non_const_buffer_size(1).error(), Error::InvalidArgument);
 
   EXPECT_EQ(
       program_->method_meta("not_a_method").error(), Error::InvalidArgument);

@@ -29,10 +29,10 @@ using namespace torch::executor;
  * power down and then back up) in between two inference requests.
  *
  * For ExecuTorch to work efficiently in these environments, we want to
- * initialize the execution plan once once for the model and avoid
- * re-initializing it for every inference. This can be achieved by restricting
- * the runtime contexts (torch::executor::Program and torch::executor::Method)
- * to live in a pre-allocated, shared, and persistent memory.
+ * initialize the Method once once for the model and avoid re-initializing it
+ * for every inference. This can be achieved by restricting the runtime contexts
+ * (torch::executor::Program and torch::executor::Method) to live in a
+ * pre-allocated, shared, and persistent memory.
  *
  * This tool demonstrates that the memory can be managed this way.
  */
@@ -79,8 +79,7 @@ Program* load_program(
 }
 
 MemoryManager* create_memory_manager(
-    Program* program,
-    const char* method_name,
+    MethodMeta* method_meta,
     MemoryAllocator& worker_allocator) {
   // Create the runtime allocator.
   auto* runtime_allocator =
@@ -89,18 +88,16 @@ MemoryManager* create_memory_manager(
   new (runtime_allocator) MemoryAllocator(sizeof(runtime_pool), runtime_pool);
 
   // Create the non-const allocator and the buffers it points to.
-  size_t num_non_const_buffers =
-      program->num_non_const_buffers(method_name).get();
+  size_t num_non_const_buffers = method_meta->num_non_const_buffers();
   MemoryAllocator* non_const_allocators =
-      worker_allocator.allocateList<MemoryAllocator>(num_non_const_buffers - 1);
-  for (size_t id = 1; id < num_non_const_buffers; ++id) {
-    const size_t buffer_size =
-        program->get_non_const_buffer_size(id, method_name).get();
+      worker_allocator.allocateList<MemoryAllocator>(num_non_const_buffers);
+  for (size_t id = 0; id < num_non_const_buffers; ++id) {
+    const size_t buffer_size = method_meta->non_const_buffer_size(id).get();
     ET_LOG(
         Info, "Setting up non-const buffer id %zu, size %zu.", id, buffer_size);
     void* buffer = worker_allocator.allocate(buffer_size);
     ET_CHECK(buffer != nullptr);
-    new (&non_const_allocators[id - 1])
+    new (&non_const_allocators[id])
         MemoryAllocator(buffer_size, (uint8_t*)buffer);
     ET_LOG(
         Info,
@@ -112,7 +109,7 @@ MemoryManager* create_memory_manager(
       worker_allocator.allocateInstance<HierarchicalAllocator>();
   ET_CHECK(non_const_allocator != nullptr);
   new (non_const_allocator)
-      HierarchicalAllocator(num_non_const_buffers - 1, non_const_allocators);
+      HierarchicalAllocator(num_non_const_buffers, non_const_allocators);
 
   // The constant allocator is not currently used, but must be provided.
   auto* const_allocator = worker_allocator.allocateInstance<MemoryAllocator>();
@@ -140,8 +137,11 @@ Method* init_method(
     MemoryAllocator& worker_allocator,
     std::vector<size_t>& input_sizes,
     std::vector<size_t>& output_sizes) {
+  Result<MethodMeta> method_meta = program->method_meta(method_name);
+  ET_CHECK(method_meta.ok());
+
   MemoryManager* memory_manager =
-      create_memory_manager(program, method_name, worker_allocator);
+      create_memory_manager(&method_meta.get(), worker_allocator);
 
   //
   // Create and load a method from the program, using the provided
@@ -227,7 +227,7 @@ void inference_loop(
   Error status = method->execute();
   ET_CHECK_MSG(
       status == Error::Ok,
-      "plan->execute() failed with status 0x%" PRIx32,
+      "method->execute() failed with status 0x%" PRIx32,
       status);
   ET_LOG(Info, "Model executed successfully.");
 }
@@ -285,8 +285,7 @@ int main(int argc, char** argv) {
   const char* method_name = nullptr;
   {
     // Use the first method in the program.
-    const size_t plan_index = 0;
-    const auto method_name_result = program->get_method_name(plan_index);
+    const auto method_name_result = program->get_method_name(0);
     ET_CHECK_MSG(method_name_result.ok(), "Program has no methods");
     method_name = *method_name_result;
   }