Skip to content

Make sdk/executor_runner use MethodMeta interface and adjust non-const indices #317

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 22 additions & 30 deletions examples/executor_runner/executor_runner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,14 +75,21 @@ int main(int argc, char** argv) {
ET_LOG(Info, "Model file %s is loaded.", model_path);

// Use the first method in the program.
const size_t plan_index = 0;
const char* method_name = nullptr;
{
const auto method_name_result = program->get_method_name(plan_index);
const auto method_name_result = program->get_method_name(0);
ET_CHECK_MSG(method_name_result.ok(), "Program has no methods");
method_name = *method_name_result;
}
ET_LOG(Info, "Running method %s", method_name);
ET_LOG(Info, "Using method %s", method_name);

// MethodMeta describes the memory requirements of the method.
Result<MethodMeta> method_meta = program->method_meta(method_name);
ET_CHECK_MSG(
method_meta.ok(),
"Failed to get method_meta for %s: 0x%x",
method_name,
(unsigned int)method_meta.error());

//
// The runtime does not use malloc/new; it allocates all memory using the
Expand Down Expand Up @@ -116,20 +123,9 @@ int main(int argc, char** argv) {
// have more than one for, e.g., slow/large DRAM and fast/small SRAM.
std::vector<std::unique_ptr<uint8_t[]>> non_const_buffers;
std::vector<MemoryAllocator> non_const_allocators;
size_t num_non_const_buffers = 0;
{
auto result = program->num_non_const_buffers(method_name);
ET_CHECK_MSG(
result.ok(),
"Failed to get number of non-const buffers for method %s: 0x%x",
method_name,
(unsigned int)result.error());
num_non_const_buffers = *result;
}
// Note that this loop starts at ID 1, because ID 0 is reserved. But, the
// HierarchicalAllocator indices are zero-based, so it's later adjusted by -1.
for (size_t id = 1; id < num_non_const_buffers; ++id) {
auto buffer_size = program->get_non_const_buffer_size(id, method_name);
size_t num_non_const_buffers = method_meta->num_non_const_buffers();
for (size_t id = 0; id < num_non_const_buffers; ++id) {
auto buffer_size = method_meta->non_const_buffer_size(id);
ET_CHECK_MSG(
buffer_size.ok(),
"Failed to get size of non-const buffer %zu for method %s: 0x%x",
Expand All @@ -139,8 +135,6 @@ int main(int argc, char** argv) {
ET_LOG(
Info, "Setting up non-const buffer %zu, size %zu.", id, *buffer_size);
non_const_buffers.push_back(std::make_unique<uint8_t[]>(*buffer_size));
// Since the list of allocators began empty, buffer ID N will live at index
// N-1.
non_const_allocators.push_back(
MemoryAllocator(*buffer_size, non_const_buffers.back().get()));
non_const_allocators.back().enable_profiling("non_const_allocators");
Expand Down Expand Up @@ -194,19 +188,17 @@ int main(int argc, char** argv) {
status);
ET_LOG(Info, "Model executed successfully.");

auto output_list =
runtime_allocator.allocateList<EValue>(method->outputs_size());
status = method->get_outputs(output_list, method->outputs_size());
// Print the outputs.
std::vector<EValue> outputs(method->outputs_size());
status = method->get_outputs(outputs.data(), outputs.size());
ET_CHECK(status == Error::Ok);
// The following code assumes all output EValues are floating point
// tensors. We need to handle other types of EValues and tensor
// dtypes. Furthermore, we need a util to print tensors in a more
// interpretable (e.g. size, dtype) and readable way.
// TODO for the above at T159700776
for (size_t i = 0; i < method->outputs_size(); i++) {
auto output_tensor = output_list[i].toTensor();
for (EValue& output : outputs) {
// TODO(T159700776): This assumes that all outputs are fp32 tensors. Add
// support for other EValues and Tensor dtypes, and print tensors in a more
// readable way.
auto output_tensor = output.toTensor();
auto data_output = output_tensor.const_data_ptr<float>();
for (size_t j = 0; j < output_list[i].toTensor().numel(); ++j) {
for (size_t j = 0; j < output_tensor.numel(); ++j) {
ET_LOG(Info, "%f", data_output[j]);
}
}
Expand Down
9 changes: 4 additions & 5 deletions exir/backend/test/demos/rpc/ExecutorBackend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,20 +77,19 @@ class ExecutorBackend final : public PyTorchBackendInterface {
runtime_allocator, MemoryAllocator);
new (client_const_allocator) MemoryAllocator(0, nullptr);

auto num_buffers = method_meta->num_non_const_buffers();
size_t num_non_const_buffers = num_buffers - 1;
auto num_non_const_buffers = method_meta->num_non_const_buffers();

uint8_t** non_const_buffers = ET_ALLOCATE_LIST_OR_RETURN_ERROR(
runtime_allocator, uint8_t*, num_non_const_buffers);
MemoryAllocator* non_const_allocators = ET_ALLOCATE_LIST_OR_RETURN_ERROR(
runtime_allocator, MemoryAllocator, num_non_const_buffers);

for (size_t id = 1; id < num_buffers; ++id) {
for (size_t id = 0; id < num_non_const_buffers; ++id) {
auto buffer_size = method_meta->non_const_buffer_size(id);
uint8_t* buffer_i = ET_ALLOCATE_LIST_OR_RETURN_ERROR(
runtime_allocator, uint8_t, buffer_size.get());
non_const_buffers[id - 1] = buffer_i;
new (&non_const_allocators[id - 1])
non_const_buffers[id] = buffer_i;
new (&non_const_allocators[id])
MemoryAllocator(static_cast<uint32_t>(buffer_size.get()), buffer_i);
}

Expand Down
3 changes: 1 addition & 2 deletions extension/pybindings/pybindings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,7 @@ class Module final {
for (size_t i = 0; i < program_->num_methods(); ++i) {
auto name = program_->get_method_name(i).get();
auto method_meta = program_->method_meta(name).get();
// 1 on purpose because non-const are 1 indexed
for (size_t j = 1; j < method_meta.num_non_const_buffers(); j++) {
for (size_t j = 0; j < method_meta.num_non_const_buffers(); j++) {
int64_t buffer_size = method_meta.non_const_buffer_size(j).get();
if (non_const_buffer_sizes.find(j) == non_const_buffer_sizes.end()) {
non_const_buffer_sizes.insert({j, buffer_size});
Expand Down
9 changes: 7 additions & 2 deletions runtime/executor/method_meta.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,10 @@ Result<TensorInfo> MethodMeta::output_tensor_meta(size_t index) const {
}

size_t MethodMeta::num_non_const_buffers() const {
return s_plan_->non_const_buffer_sizes()->size();
// Index zero is reserved internally, and we hide it from users. The actual
// number of buffers is one fewer than the actual size of this list in the
// program.
return s_plan_->non_const_buffer_sizes()->size() - 1;
}

Result<int64_t> MethodMeta::non_const_buffer_size(size_t index) const {
Expand All @@ -181,7 +184,9 @@ Result<int64_t> MethodMeta::non_const_buffer_size(size_t index) const {
"index %zu out of range. num_buffers: %zu",
index,
num_buffers);
return s_plan_->non_const_buffer_sizes()->Get(index);
// Index zero is reserved internally, and we hide it from users. Adjust the
// provided index to point to one of the actual buffers.
return s_plan_->non_const_buffer_sizes()->Get(index + 1);
}

} // namespace executor
Expand Down
6 changes: 3 additions & 3 deletions runtime/executor/test/method_meta_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,14 +76,14 @@ TEST_F(MethodMetaTest, MethodMetaApi) {
EXPECT_EQ(method_meta->num_outputs(), 1);

// Appropriate amount of non_const_buffers
EXPECT_EQ(method_meta->num_non_const_buffers(), 2);
EXPECT_EQ(method_meta->num_non_const_buffers(), 1);

// Appropriate content of non_const_buffers
EXPECT_EQ(method_meta->non_const_buffer_size(1).get(), 48);
EXPECT_EQ(method_meta->non_const_buffer_size(0).get(), 48);

// Invalid index Errors
EXPECT_EQ(
method_meta->non_const_buffer_size(2).error(), Error::InvalidArgument);
method_meta->non_const_buffer_size(1).error(), Error::InvalidArgument);

EXPECT_EQ(
program_->method_meta("not_a_method").error(), Error::InvalidArgument);
Expand Down
42 changes: 14 additions & 28 deletions sdk/runners/executor_runner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,14 @@ int main(int argc, char** argv) {
}
ET_LOG(Info, "Running method %s", method_name);

// MethodMeta describes the memory requirements of the method.
Result<MethodMeta> method_meta = program->method_meta(method_name);
ET_CHECK_MSG(
method_meta.ok(),
"Failed to get method_meta for %s: 0x%x",
method_name,
(unsigned int)method_meta.error());

//
// The runtime does not use malloc/new; it allocates all memory using the
// MemoryManger provided by the client. Clients are responsible for allocating
Expand Down Expand Up @@ -265,35 +273,13 @@ int main(int argc, char** argv) {
// have more than one for, e.g., slow/large DRAM and fast/small SRAM.
std::vector<std::unique_ptr<uint8_t[]>> non_const_buffers;
std::vector<MemoryAllocator> non_const_allocators;
size_t num_non_const_buffers = 0;
{
auto result = program->num_non_const_buffers(method_name);
ET_CHECK_MSG(
result.ok(),
"Failed to get number of non-const buffers for method %s: 0x%x",
method_name,
(unsigned int)result.error());
num_non_const_buffers = *result;
}
// Note that this loop starts at ID 1, because ID 0 is reserved. But, the
// HierarchicalAllocator indices are zero-based, so it's later adjusted by -1.
// TODO(T142455629): Make HierarchicalAllocator ID-based to avoid this
// memory_id-1.
for (size_t id = 1; id < num_non_const_buffers; ++id) {
auto buffer_size = program->get_non_const_buffer_size(id, method_name);
ET_CHECK_MSG(
buffer_size.ok(),
"Failed to get size of non-const buffer %zu for method %s: 0x%x",
id,
method_name,
(unsigned int)buffer_size.error());
ET_LOG(
Info, "Setting up non-const buffer %zu, size %zu.", id, *buffer_size);
non_const_buffers.push_back(std::make_unique<uint8_t[]>(*buffer_size));
// Since the list of allocators began empty, buffer ID N will live at index
// N-1.
size_t num_non_const_buffers = method_meta->num_non_const_buffers();
for (size_t id = 0; id < num_non_const_buffers; ++id) {
size_t buffer_size = method_meta->non_const_buffer_size(id).get();
ET_LOG(Info, "Setting up non-const buffer %zu, size %zu.", id, buffer_size);
non_const_buffers.push_back(std::make_unique<uint8_t[]>(buffer_size));
non_const_allocators.push_back(
MemoryAllocator(*buffer_size, non_const_buffers.back().get()));
MemoryAllocator(buffer_size, non_const_buffers.back().get()));
non_const_allocators.back().enable_profiling("non_const_allocators");
}
HierarchicalAllocator non_const_allocator(
Expand Down
35 changes: 17 additions & 18 deletions test/relocatable_runner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,10 @@ using namespace torch::executor;
* power down and then back up) in between two inference requests.
*
* For ExecuTorch to work efficiently in these environments, we want to
* initialize the execution plan once once for the model and avoid
* re-initializing it for every inference. This can be achieved by restricting
* the runtime contexts (torch::executor::Program and torch::executor::Method)
* to live in a pre-allocated, shared, and persistent memory.
* initialize the Method once once for the model and avoid re-initializing it
* for every inference. This can be achieved by restricting the runtime contexts
* (torch::executor::Program and torch::executor::Method) to live in a
* pre-allocated, shared, and persistent memory.
*
* This tool demonstrates that the memory can be managed this way.
*/
Expand Down Expand Up @@ -79,8 +79,7 @@ Program* load_program(
}

MemoryManager* create_memory_manager(
Program* program,
const char* method_name,
MethodMeta* method_meta,
MemoryAllocator& worker_allocator) {
// Create the runtime allocator.
auto* runtime_allocator =
Expand All @@ -89,18 +88,16 @@ MemoryManager* create_memory_manager(
new (runtime_allocator) MemoryAllocator(sizeof(runtime_pool), runtime_pool);

// Create the non-const allocator and the buffers it points to.
size_t num_non_const_buffers =
program->num_non_const_buffers(method_name).get();
size_t num_non_const_buffers = method_meta->num_non_const_buffers();
MemoryAllocator* non_const_allocators =
worker_allocator.allocateList<MemoryAllocator>(num_non_const_buffers - 1);
for (size_t id = 1; id < num_non_const_buffers; ++id) {
const size_t buffer_size =
program->get_non_const_buffer_size(id, method_name).get();
worker_allocator.allocateList<MemoryAllocator>(num_non_const_buffers);
for (size_t id = 0; id < num_non_const_buffers; ++id) {
const size_t buffer_size = method_meta->non_const_buffer_size(id).get();
ET_LOG(
Info, "Setting up non-const buffer id %zu, size %zu.", id, buffer_size);
void* buffer = worker_allocator.allocate(buffer_size);
ET_CHECK(buffer != nullptr);
new (&non_const_allocators[id - 1])
new (&non_const_allocators[id])
MemoryAllocator(buffer_size, (uint8_t*)buffer);
ET_LOG(
Info,
Expand All @@ -112,7 +109,7 @@ MemoryManager* create_memory_manager(
worker_allocator.allocateInstance<HierarchicalAllocator>();
ET_CHECK(non_const_allocator != nullptr);
new (non_const_allocator)
HierarchicalAllocator(num_non_const_buffers - 1, non_const_allocators);
HierarchicalAllocator(num_non_const_buffers, non_const_allocators);

// The constant allocator is not currently used, but must be provided.
auto* const_allocator = worker_allocator.allocateInstance<MemoryAllocator>();
Expand Down Expand Up @@ -140,8 +137,11 @@ Method* init_method(
MemoryAllocator& worker_allocator,
std::vector<size_t>& input_sizes,
std::vector<size_t>& output_sizes) {
Result<MethodMeta> method_meta = program->method_meta(method_name);
ET_CHECK(method_meta.ok());

MemoryManager* memory_manager =
create_memory_manager(program, method_name, worker_allocator);
create_memory_manager(&method_meta.get(), worker_allocator);

//
// Create and load a method from the program, using the provided
Expand Down Expand Up @@ -227,7 +227,7 @@ void inference_loop(
Error status = method->execute();
ET_CHECK_MSG(
status == Error::Ok,
"plan->execute() failed with status 0x%" PRIx32,
"method->execute() failed with status 0x%" PRIx32,
status);
ET_LOG(Info, "Model executed successfully.");
}
Expand Down Expand Up @@ -285,8 +285,7 @@ int main(int argc, char** argv) {
const char* method_name = nullptr;
{
// Use the first method in the program.
const size_t plan_index = 0;
const auto method_name_result = program->get_method_name(plan_index);
const auto method_name_result = program->get_method_name(0);
ET_CHECK_MSG(method_name_result.ok(), "Program has no methods");
method_name = *method_name_result;
}
Expand Down