Skip to content

[executorch] Migrate runner-like targets to use the new MemoryManager API #403

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 31 additions & 48 deletions examples/bundled_executor_runner/bundled_executor_runner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,7 @@
#include <executorch/util/bundled_program_verification.h>
#include <executorch/util/util.h>

static constexpr size_t kRuntimeMemorySize = 4 * 1024U * 1024U; // 4 MB
static uint8_t runtime_pool[kRuntimeMemorySize];
static uint8_t method_allocator_pool[4 * 1024U * 1024U]; // 4MB
static constexpr size_t kBundledAllocatorPoolSize = 16 * 1024U;
static uint8_t bundled_allocator_pool[kBundledAllocatorPoolSize];

Expand Down Expand Up @@ -138,70 +137,52 @@ int main(int argc, char** argv) {
// do it dynamically.
//

// The runtime allocator is used to allocate all dynamic C++ metadata/objects
// used to represent the loaded program. This allocator is only used during
// The method allocator is used to allocate all dynamic C++ metadata/objects
// used to represent the loaded method. This allocator is only used during
// loading a method of the program, which will return an error if there was
// not enough memory.
//
// The amount of memory required depends on the loaded program and the runtime
// The amount of memory required depends on the loaded method and the runtime
// code itself. The amount of memory here is usually determined by running the
// program and seeing how much memory is actually used, though it's possible
// to subclass MemoryAllocator so that it calls malloc() under the hood.

// In this example we using statically allocated gloabl runtime_pool of
// size kRuntimeMemorySize
MemoryAllocator runtime_allocator{
MemoryAllocator(kRuntimeMemorySize, runtime_pool)};
runtime_allocator.enable_profiling("runtime allocator");
// method and seeing how much memory is actually used, though it's possible to
// subclass MemoryAllocator so that it calls malloc() under the hood (see
// MallocMemoryAllocator).
//
// In this example we use a statically allocated memory pool.
MemoryAllocator method_allocator{
MemoryAllocator(sizeof(method_allocator_pool), method_allocator_pool)};
method_allocator.enable_profiling("method allocator");

// The non-const buffers will back the mutable tensors used by the method. The
// sizes of these buffers were determined ahead of time during the
// The memory-planned buffers will back the mutable tensors used by the
// method. The sizes of these buffers were determined ahead of time during the
// memory-planning pasees.
//
// Each buffer typically corresponds to a different hardware memory bank. Most
// mobile environments will only have a single buffer. Some embedded
// environments may have more than one for, e.g., slow/large DRAM and
// fast/small SRAM, or for memory associated with particular cores.
std::vector<std::unique_ptr<uint8_t[]>> non_const_buffers;
std::vector<Span<uint8_t>> non_const_spans;
size_t num_non_const_buffers = method_meta->num_non_const_buffers();
for (size_t id = 0; id < num_non_const_buffers; ++id) {
// .get() will always succeed because id < num_non_const_buffers.
std::vector<std::unique_ptr<uint8_t[]>> planned_buffers; // Owns the memory
std::vector<Span<uint8_t>> planned_spans; // Passed to the allocator
size_t num_memory_planned_buffers = method_meta->num_memory_planned_buffers();
for (size_t id = 0; id < num_memory_planned_buffers; ++id) {
// .get() will always succeed because id < num_memory_planned_buffers.
size_t buffer_size =
static_cast<size_t>(method_meta->non_const_buffer_size(id).get());
ET_LOG(Info, "Setting up non-const buffer %zu, size %zu.", id, buffer_size);
non_const_buffers.push_back(std::make_unique<uint8_t[]>(buffer_size));
non_const_spans.push_back({non_const_buffers.back().get(), buffer_size});
static_cast<size_t>(method_meta->memory_planned_buffer_size(id).get());
ET_LOG(Info, "Setting up planned buffer %zu, size %zu.", id, buffer_size);
planned_buffers.push_back(std::make_unique<uint8_t[]>(buffer_size));
planned_spans.push_back({planned_buffers.back().get(), buffer_size});
}
HierarchicalAllocator non_const_allocator(
{non_const_spans.data(), non_const_spans.size()});

// Allocator for bundled input.
MemoryAllocator bundled_input_allocator{
MemoryAllocator(kBundledAllocatorPoolSize, bundled_allocator_pool)};

// The constant allocator is not currently used. Please initialize with a
// zero-sized allocator.
MemoryAllocator const_allocator{MemoryAllocator(0, nullptr)};
const_allocator.enable_profiling("const allocator");

// The kernel temporary allocator is not currently used. Please initialize
// with a zero-sized allocator.
MemoryAllocator temp_allocator{MemoryAllocator(0, nullptr)};
temp_allocator.enable_profiling("temp allocator");
HierarchicalAllocator planned_memory(
{planned_spans.data(), planned_spans.size()});

// Assemble all of the allocators into the MemoryManager that the Executor
// will use.
MemoryManager memory_manager(
&const_allocator,
&non_const_allocator,
&runtime_allocator,
&temp_allocator);
MemoryManager memory_manager(&method_allocator, &planned_memory);

//
// Load method from the program, using the provided
// allocators. Running the method can mutate allocated non_const buffers,
// so should only be used by a single thread at at time, but it can be reused.
// Load the method from the program, using the provided allocators. Running
// the method can mutate the memory-planned buffers, so the method should only
// be used by a single thread at at time, but it can be reused.
//

Result<Method> method = program->load_method(method_name, &memory_manager);
Expand All @@ -214,6 +195,8 @@ int main(int argc, char** argv) {

// Prepare the inputs.
// Use ones-initialized inputs or bundled inputs.
MemoryAllocator bundled_input_allocator{
MemoryAllocator(kBundledAllocatorPoolSize, bundled_allocator_pool)};
exec_aten::ArrayRef<void*> inputs;
if (FLAGS_bundled_program) {
// Use the inputs embedded in the bundled program.
Expand Down
73 changes: 29 additions & 44 deletions examples/executor_runner/executor_runner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,7 @@
#include <executorch/runtime/platform/runtime.h>
#include <executorch/util/util.h>

static constexpr size_t kRuntimeMemorySize = 4 * 1024U * 1024U; // 4 MB
static uint8_t runtime_pool[kRuntimeMemorySize];
static uint8_t method_allocator_pool[4 * 1024U * 1024U]; // 4 MB

DEFINE_string(
model_path,
Expand Down Expand Up @@ -98,66 +97,52 @@ int main(int argc, char** argv) {
// do it dynamically.
//

// The runtime allocator is used to allocate all dynamic C++ metadata/objects
// used to represent the loaded program. This allocator is only used during
// The method allocator is used to allocate all dynamic C++ metadata/objects
// used to represent the loaded method. This allocator is only used during
// loading a method of the program, which will return an error if there was
// not enough memory.
//
// The amount of memory required depends on the loaded program and the runtime
// The amount of memory required depends on the loaded method and the runtime
// code itself. The amount of memory here is usually determined by running the
// program and seeing how much memory is actually used, though it's possible
// to subclass MemoryAllocator so that it calls malloc() under the hood.

// In this example we using statically allocated gloabl runtime_pool of
// size kRuntimeMemorySize
MemoryAllocator runtime_allocator{
MemoryAllocator(kRuntimeMemorySize, runtime_pool)};
runtime_allocator.enable_profiling("runtime allocator");
// method and seeing how much memory is actually used, though it's possible to
// subclass MemoryAllocator so that it calls malloc() under the hood (see
// MallocMemoryAllocator).
//
// In this example we use a statically allocated memory pool.
MemoryAllocator method_allocator{
MemoryAllocator(sizeof(method_allocator_pool), method_allocator_pool)};
method_allocator.enable_profiling("method allocator");

// The non-const buffers will back the mutable tensors used by the method. The
// sizes of these buffers were determined ahead of time during the
// The memory-planned buffers will back the mutable tensors used by the
// method. The sizes of these buffers were determined ahead of time during the
// memory-planning pasees.
//
// Each buffer typically corresponds to a different hardware memory bank. Most
// mobile environments will only have a single buffer. Some embedded
// environments may have more than one for, e.g., slow/large DRAM and
// fast/small SRAM, or for memory associated with particular cores.
std::vector<std::unique_ptr<uint8_t[]>> non_const_buffers;
std::vector<Span<uint8_t>> non_const_spans;
size_t num_non_const_buffers = method_meta->num_non_const_buffers();
for (size_t id = 0; id < num_non_const_buffers; ++id) {
// .get() will always succeed because id < num_non_const_buffers.
std::vector<std::unique_ptr<uint8_t[]>> planned_buffers; // Owns the memory
std::vector<Span<uint8_t>> planned_spans; // Passed to the allocator
size_t num_memory_planned_buffers = method_meta->num_memory_planned_buffers();
for (size_t id = 0; id < num_memory_planned_buffers; ++id) {
// .get() will always succeed because id < num_memory_planned_buffers.
size_t buffer_size =
static_cast<size_t>(method_meta->non_const_buffer_size(id).get());
ET_LOG(Info, "Setting up non-const buffer %zu, size %zu.", id, buffer_size);
non_const_buffers.push_back(std::make_unique<uint8_t[]>(buffer_size));
non_const_spans.push_back({non_const_buffers.back().get(), buffer_size});
static_cast<size_t>(method_meta->memory_planned_buffer_size(id).get());
ET_LOG(Info, "Setting up planned buffer %zu, size %zu.", id, buffer_size);
planned_buffers.push_back(std::make_unique<uint8_t[]>(buffer_size));
planned_spans.push_back({planned_buffers.back().get(), buffer_size});
}
HierarchicalAllocator non_const_allocator(
{non_const_spans.data(), non_const_spans.size()});

// The constant allocator is not currently used. Please initialize with a
// zero-sized allocator.
MemoryAllocator const_allocator{MemoryAllocator(0, nullptr)};
const_allocator.enable_profiling("const allocator");

// The kernel temporary allocator is not currently used. Please initialize
// with a zero-sized allocator.
MemoryAllocator temp_allocator{MemoryAllocator(0, nullptr)};
temp_allocator.enable_profiling("temp allocator");
HierarchicalAllocator planned_memory(
{planned_spans.data(), planned_spans.size()});

// Assemble all of the allocators into the MemoryManager that the Executor
// will use.
MemoryManager memory_manager(
&const_allocator,
&non_const_allocator,
&runtime_allocator,
&temp_allocator);
MemoryManager memory_manager(&method_allocator, &planned_memory);

//
// Load method from the program, using the provided
// allocators. Running the method can mutate allocated non_const buffers,
// so should only be used by a single thread at at time, but it can be reused.
// Load the method from the program, using the provided allocators. Running
// the method can mutate the memory-planned buffers, so the method should only
// be used by a single thread at at time, but it can be reused.
//

Result<Method> method = program->load_method(method_name, &memory_manager);
Expand Down
39 changes: 14 additions & 25 deletions exir/backend/test/demos/rpc/ExecutorBackend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,50 +73,39 @@ class ExecutorBackend final : public PyTorchBackendInterface {
}

// Building all different allocators for the client executor
auto client_const_allocator = ET_ALLOCATE_INSTANCE_OR_RETURN_ERROR(
runtime_allocator, MemoryAllocator);
new (client_const_allocator) MemoryAllocator(0, nullptr);

auto num_non_const_buffers = method_meta->num_non_const_buffers();
auto num_memory_planned_buffers = method_meta->num_memory_planned_buffers();

Span<uint8_t>* non_const_buffers = ET_ALLOCATE_LIST_OR_RETURN_ERROR(
runtime_allocator, Span<uint8_t>, num_non_const_buffers);
Span<uint8_t>* memory_planned_buffers = ET_ALLOCATE_LIST_OR_RETURN_ERROR(
runtime_allocator, Span<uint8_t>, num_memory_planned_buffers);

for (size_t id = 0; id < num_non_const_buffers; ++id) {
size_t buffer_size =
static_cast<size_t>(method_meta->non_const_buffer_size(id).get());
for (size_t id = 0; id < num_memory_planned_buffers; ++id) {
size_t buffer_size = static_cast<size_t>(
method_meta->memory_planned_buffer_size(id).get());
uint8_t* buffer_i = ET_ALLOCATE_LIST_OR_RETURN_ERROR(
runtime_allocator, uint8_t, buffer_size);
non_const_buffers[id] = {buffer_i, buffer_size};
memory_planned_buffers[id] = {buffer_i, buffer_size};
}

auto client_non_const_allocator = ET_ALLOCATE_INSTANCE_OR_RETURN_ERROR(
auto client_planned_memory = ET_ALLOCATE_INSTANCE_OR_RETURN_ERROR(
runtime_allocator, HierarchicalAllocator);
new (client_non_const_allocator)
HierarchicalAllocator({non_const_buffers, num_non_const_buffers});
new (client_planned_memory) HierarchicalAllocator(
{memory_planned_buffers, num_memory_planned_buffers});

// Allocate some memory from runtime allocator for the client executor, in
// real case, like if it's an executor in dsp, it should allocate memory
// dedicated to this specific hardware
auto client_runtime_allocator = ET_ALLOCATE_INSTANCE_OR_RETURN_ERROR(
auto client_method_allocator = ET_ALLOCATE_INSTANCE_OR_RETURN_ERROR(
runtime_allocator, MemoryAllocator);
const size_t kClientRuntimeMemorySize = 4 * 1024U;
auto runtime_pool = ET_ALLOCATE_OR_RETURN_ERROR(
runtime_allocator, kClientRuntimeMemorySize);
new (client_runtime_allocator) MemoryAllocator(
new (client_method_allocator) MemoryAllocator(
kClientRuntimeMemorySize, static_cast<uint8_t*>(runtime_pool));

auto client_temp_allocator = ET_ALLOCATE_INSTANCE_OR_RETURN_ERROR(
runtime_allocator, MemoryAllocator);
new (client_temp_allocator) MemoryAllocator(0, nullptr);

auto client_memory_manager =
ET_ALLOCATE_INSTANCE_OR_RETURN_ERROR(runtime_allocator, MemoryManager);
new (client_memory_manager) MemoryManager(
client_const_allocator,
client_non_const_allocator,
client_runtime_allocator,
client_temp_allocator);
new (client_memory_manager)
MemoryManager(client_method_allocator, client_planned_memory);

// Construct the client Method
Result<Method> method_res =
Expand Down
Loading