pytorch · dbort · Sep 13, 2023 · Sep 13, 2023 · Sep 13, 2023 · Sep 13, 2023
diff --git a/examples/executor_runner/executor_runner.cpp b/examples/executor_runner/executor_runner.cpp
@@ -75,14 +75,21 @@ int main(int argc, char** argv) {
   ET_LOG(Info, "Model file %s is loaded.", model_path);
 
   // Use the first method in the program.
-  const size_t plan_index = 0;
   const char* method_name = nullptr;
   {
-    const auto method_name_result = program->get_method_name(plan_index);
+    const auto method_name_result = program->get_method_name(0);
     ET_CHECK_MSG(method_name_result.ok(), "Program has no methods");
     method_name = *method_name_result;
   }
-  ET_LOG(Info, "Running method %s", method_name);
+  ET_LOG(Info, "Using method %s", method_name);
+
+  // MethodMeta describes the memory requirements of the method.
+  Result<MethodMeta> method_meta = program->method_meta(method_name);
+  ET_CHECK_MSG(
+      method_meta.ok(),
+      "Failed to get method_meta for %s: 0x%x",
+      method_name,
+      (unsigned int)method_meta.error());
 
   //
   // The runtime does not use malloc/new; it allocates all memory using the
@@ -116,20 +123,9 @@ int main(int argc, char** argv) {
   // have more than one for, e.g., slow/large DRAM and fast/small SRAM.
   std::vector<std::unique_ptr<uint8_t[]>> non_const_buffers;
   std::vector<MemoryAllocator> non_const_allocators;
-  size_t num_non_const_buffers = 0;
-  {
-    auto result = program->num_non_const_buffers(method_name);
-    ET_CHECK_MSG(
-        result.ok(),
-        "Failed to get number of non-const buffers for method %s: 0x%x",
-        method_name,
-        (unsigned int)result.error());
-    num_non_const_buffers = *result;
-  }
-  // Note that this loop starts at ID 1, because ID 0 is reserved. But, the
-  // HierarchicalAllocator indices are zero-based, so it's later adjusted by -1.
-  for (size_t id = 1; id < num_non_const_buffers; ++id) {
-    auto buffer_size = program->get_non_const_buffer_size(id, method_name);
+  size_t num_non_const_buffers = method_meta->num_non_const_buffers();
+  for (size_t id = 0; id < num_non_const_buffers; ++id) {
+    auto buffer_size = method_meta->non_const_buffer_size(id);
     ET_CHECK_MSG(
         buffer_size.ok(),
         "Failed to get size of non-const buffer %zu for method %s: 0x%x",
@@ -139,8 +135,6 @@ int main(int argc, char** argv) {
     ET_LOG(
         Info, "Setting up non-const buffer %zu, size %zu.", id, *buffer_size);
     non_const_buffers.push_back(std::make_unique<uint8_t[]>(*buffer_size));
-    // Since the list of allocators began empty, buffer ID N will live at index
-    // N-1.
     non_const_allocators.push_back(
         MemoryAllocator(*buffer_size, non_const_buffers.back().get()));
     non_const_allocators.back().enable_profiling("non_const_allocators");
@@ -194,19 +188,17 @@ int main(int argc, char** argv) {
       status);
   ET_LOG(Info, "Model executed successfully.");
 
-  auto output_list =
-      runtime_allocator.allocateList<EValue>(method->outputs_size());
-  status = method->get_outputs(output_list, method->outputs_size());
+  // Print the outputs.
+  std::vector<EValue> outputs(method->outputs_size());
+  status = method->get_outputs(outputs.data(), outputs.size());
   ET_CHECK(status == Error::Ok);
-  // The following code assumes all output EValues are floating point
-  // tensors. We need to handle other types of EValues and tensor
-  // dtypes. Furthermore, we need a util to print tensors in a more
-  // interpretable (e.g. size, dtype) and readable way.
-  // TODO for the above at T159700776
-  for (size_t i = 0; i < method->outputs_size(); i++) {
-    auto output_tensor = output_list[i].toTensor();
+  for (EValue& output : outputs) {
+    // TODO(T159700776): This assumes that all outputs are fp32 tensors. Add
+    // support for other EValues and Tensor dtypes, and print tensors in a more
+    // readable way.
+    auto output_tensor = output.toTensor();
     auto data_output = output_tensor.const_data_ptr<float>();
-    for (size_t j = 0; j < output_list[i].toTensor().numel(); ++j) {
+    for (size_t j = 0; j < output_tensor.numel(); ++j) {
       ET_LOG(Info, "%f", data_output[j]);
     }
   }

@@ -77,20 +77,19 @@ class ExecutorBackend final : public PyTorchBackendInterface {
         runtime_allocator, MemoryAllocator);
     new (client_const_allocator) MemoryAllocator(0, nullptr);
 
-    auto num_buffers = method_meta->num_non_const_buffers();
-    size_t num_non_const_buffers = num_buffers - 1;
+    auto num_non_const_buffers = method_meta->num_non_const_buffers();
 
     uint8_t** non_const_buffers = ET_ALLOCATE_LIST_OR_RETURN_ERROR(
         runtime_allocator, uint8_t*, num_non_const_buffers);
     MemoryAllocator* non_const_allocators = ET_ALLOCATE_LIST_OR_RETURN_ERROR(
         runtime_allocator, MemoryAllocator, num_non_const_buffers);
 
-    for (size_t id = 1; id < num_buffers; ++id) {
+    for (size_t id = 0; id < num_non_const_buffers; ++id) {
       auto buffer_size = method_meta->non_const_buffer_size(id);
       uint8_t* buffer_i = ET_ALLOCATE_LIST_OR_RETURN_ERROR(
           runtime_allocator, uint8_t, buffer_size.get());
-      non_const_buffers[id - 1] = buffer_i;
-      new (&non_const_allocators[id - 1])
+      non_const_buffers[id] = buffer_i;
+      new (&non_const_allocators[id])
           MemoryAllocator(static_cast<uint32_t>(buffer_size.get()), buffer_i);
     }
 

@@ -83,8 +83,7 @@ class Module final {
     for (size_t i = 0; i < program_->num_methods(); ++i) {
       auto name = program_->get_method_name(i).get();
       auto method_meta = program_->method_meta(name).get();
-      // 1 on purpose because non-const are 1 indexed
-      for (size_t j = 1; j < method_meta.num_non_const_buffers(); j++) {
+      for (size_t j = 0; j < method_meta.num_non_const_buffers(); j++) {
         int64_t buffer_size = method_meta.non_const_buffer_size(j).get();
         if (non_const_buffer_sizes.find(j) == non_const_buffer_sizes.end()) {
           non_const_buffer_sizes.insert({j, buffer_size});

@@ -170,7 +170,10 @@ Result<TensorInfo> MethodMeta::output_tensor_meta(size_t index) const {
 }
 
 size_t MethodMeta::num_non_const_buffers() const {
-  return s_plan_->non_const_buffer_sizes()->size();
+  // Index zero is reserved internally, and we hide it from users. The actual
+  // number of buffers is one fewer than the actual size of this list in the
+  // program.
+  return s_plan_->non_const_buffer_sizes()->size() - 1;
 }
 
 Result<int64_t> MethodMeta::non_const_buffer_size(size_t index) const {
@@ -181,7 +184,9 @@ Result<int64_t> MethodMeta::non_const_buffer_size(size_t index) const {
       "index %zu out of range. num_buffers: %zu",
       index,
       num_buffers);
-  return s_plan_->non_const_buffer_sizes()->Get(index);
+  // Index zero is reserved internally, and we hide it from users. Adjust the
+  // provided index to point to one of the actual buffers.
+  return s_plan_->non_const_buffer_sizes()->Get(index + 1);
 }
 
 } // namespace executor

@@ -76,14 +76,14 @@ TEST_F(MethodMetaTest, MethodMetaApi) {
   EXPECT_EQ(method_meta->num_outputs(), 1);
 
   // Appropriate amount of non_const_buffers
-  EXPECT_EQ(method_meta->num_non_const_buffers(), 2);
+  EXPECT_EQ(method_meta->num_non_const_buffers(), 1);
 
   // Appropriate content of non_const_buffers
-  EXPECT_EQ(method_meta->non_const_buffer_size(1).get(), 48);
+  EXPECT_EQ(method_meta->non_const_buffer_size(0).get(), 48);
 
   // Invalid index Errors
   EXPECT_EQ(
-      method_meta->non_const_buffer_size(2).error(), Error::InvalidArgument);
+      method_meta->non_const_buffer_size(1).error(), Error::InvalidArgument);
 
   EXPECT_EQ(
       program_->method_meta("not_a_method").error(), Error::InvalidArgument);

diff --git a/sdk/runners/executor_runner.cpp b/sdk/runners/executor_runner.cpp
@@ -236,6 +236,14 @@ int main(int argc, char** argv) {
   }
   ET_LOG(Info, "Running method %s", method_name);
 
+  // MethodMeta describes the memory requirements of the method.
+  Result<MethodMeta> method_meta = program->method_meta(method_name);
+  ET_CHECK_MSG(
+      method_meta.ok(),
+      "Failed to get method_meta for %s: 0x%x",
+      method_name,
+      (unsigned int)method_meta.error());
+
   //
   // The runtime does not use malloc/new; it allocates all memory using the
   // MemoryManger provided by the client. Clients are responsible for allocating
@@ -265,35 +273,13 @@ int main(int argc, char** argv) {
   // have more than one for, e.g., slow/large DRAM and fast/small SRAM.
   std::vector<std::unique_ptr<uint8_t[]>> non_const_buffers;
   std::vector<MemoryAllocator> non_const_allocators;
-  size_t num_non_const_buffers = 0;
-  {
-    auto result = program->num_non_const_buffers(method_name);
-    ET_CHECK_MSG(
-        result.ok(),
-        "Failed to get number of non-const buffers for method %s: 0x%x",
-        method_name,
-        (unsigned int)result.error());
-    num_non_const_buffers = *result;
-  }
-  // Note that this loop starts at ID 1, because ID 0 is reserved. But, the
-  // HierarchicalAllocator indices are zero-based, so it's later adjusted by -1.
-  // TODO(T142455629): Make HierarchicalAllocator ID-based to avoid this
-  // memory_id-1.
-  for (size_t id = 1; id < num_non_const_buffers; ++id) {
-    auto buffer_size = program->get_non_const_buffer_size(id, method_name);
-    ET_CHECK_MSG(
-        buffer_size.ok(),
-        "Failed to get size of non-const buffer %zu for method %s: 0x%x",
-        id,
-        method_name,
-        (unsigned int)buffer_size.error());
-    ET_LOG(
-        Info, "Setting up non-const buffer %zu, size %zu.", id, *buffer_size);
-    non_const_buffers.push_back(std::make_unique<uint8_t[]>(*buffer_size));
-    // Since the list of allocators began empty, buffer ID N will live at index
-    // N-1.
+  size_t num_non_const_buffers = method_meta->num_non_const_buffers();
+  for (size_t id = 0; id < num_non_const_buffers; ++id) {
+    size_t buffer_size = method_meta->non_const_buffer_size(id).get();
+    ET_LOG(Info, "Setting up non-const buffer %zu, size %zu.", id, buffer_size);
+    non_const_buffers.push_back(std::make_unique<uint8_t[]>(buffer_size));
     non_const_allocators.push_back(
-        MemoryAllocator(*buffer_size, non_const_buffers.back().get()));
+        MemoryAllocator(buffer_size, non_const_buffers.back().get()));
     non_const_allocators.back().enable_profiling("non_const_allocators");
   }
   HierarchicalAllocator non_const_allocator(

@@ -29,10 +29,10 @@ using namespace torch::executor;
  * power down and then back up) in between two inference requests.
  *
  * For ExecuTorch to work efficiently in these environments, we want to
- * initialize the execution plan once once for the model and avoid
- * re-initializing it for every inference. This can be achieved by restricting
- * the runtime contexts (torch::executor::Program and torch::executor::Method)
- * to live in a pre-allocated, shared, and persistent memory.
+ * initialize the Method once once for the model and avoid re-initializing it
+ * for every inference. This can be achieved by restricting the runtime contexts
+ * (torch::executor::Program and torch::executor::Method) to live in a
+ * pre-allocated, shared, and persistent memory.
  *
  * This tool demonstrates that the memory can be managed this way.
  */
@@ -79,8 +79,7 @@ Program* load_program(
 }
 
 MemoryManager* create_memory_manager(
-    Program* program,
-    const char* method_name,
+    MethodMeta* method_meta,
     MemoryAllocator& worker_allocator) {
   // Create the runtime allocator.
   auto* runtime_allocator =
@@ -89,18 +88,16 @@ MemoryManager* create_memory_manager(
   new (runtime_allocator) MemoryAllocator(sizeof(runtime_pool), runtime_pool);
 
   // Create the non-const allocator and the buffers it points to.
-  size_t num_non_const_buffers =
-      program->num_non_const_buffers(method_name).get();
+  size_t num_non_const_buffers = method_meta->num_non_const_buffers();
   MemoryAllocator* non_const_allocators =
-      worker_allocator.allocateList<MemoryAllocator>(num_non_const_buffers - 1);
-  for (size_t id = 1; id < num_non_const_buffers; ++id) {
-    const size_t buffer_size =
-        program->get_non_const_buffer_size(id, method_name).get();
+      worker_allocator.allocateList<MemoryAllocator>(num_non_const_buffers);
+  for (size_t id = 0; id < num_non_const_buffers; ++id) {
+    const size_t buffer_size = method_meta->non_const_buffer_size(id).get();
     ET_LOG(
         Info, "Setting up non-const buffer id %zu, size %zu.", id, buffer_size);
     void* buffer = worker_allocator.allocate(buffer_size);
     ET_CHECK(buffer != nullptr);
-    new (&non_const_allocators[id - 1])
+    new (&non_const_allocators[id])
         MemoryAllocator(buffer_size, (uint8_t*)buffer);
     ET_LOG(
         Info,
@@ -112,7 +109,7 @@ MemoryManager* create_memory_manager(
       worker_allocator.allocateInstance<HierarchicalAllocator>();
   ET_CHECK(non_const_allocator != nullptr);
   new (non_const_allocator)
-      HierarchicalAllocator(num_non_const_buffers - 1, non_const_allocators);
+      HierarchicalAllocator(num_non_const_buffers, non_const_allocators);
 
   // The constant allocator is not currently used, but must be provided.
   auto* const_allocator = worker_allocator.allocateInstance<MemoryAllocator>();
@@ -140,8 +137,11 @@ Method* init_method(
     MemoryAllocator& worker_allocator,
     std::vector<size_t>& input_sizes,
     std::vector<size_t>& output_sizes) {
+  Result<MethodMeta> method_meta = program->method_meta(method_name);
+  ET_CHECK(method_meta.ok());
+
   MemoryManager* memory_manager =
-      create_memory_manager(program, method_name, worker_allocator);
+      create_memory_manager(&method_meta.get(), worker_allocator);
 
   //
   // Create and load a method from the program, using the provided
@@ -227,7 +227,7 @@ void inference_loop(
   Error status = method->execute();
   ET_CHECK_MSG(
       status == Error::Ok,
-      "plan->execute() failed with status 0x%" PRIx32,
+      "method->execute() failed with status 0x%" PRIx32,
       status);
   ET_LOG(Info, "Model executed successfully.");
 }
@@ -285,8 +285,7 @@ int main(int argc, char** argv) {
   const char* method_name = nullptr;
   {
     // Use the first method in the program.
-    const size_t plan_index = 0;
-    const auto method_name_result = program->get_method_name(plan_index);
+    const auto method_name_result = program->get_method_name(0);
     ET_CHECK_MSG(method_name_result.ok(), "Program has no methods");
     method_name = *method_name_result;
   }