Skip to content

Let Module use FIleDataLoader when requested. #4174

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/source/extension-module.md
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ Use [ExecuTorch Dump](sdk-etdump.md) to trace model execution. Create an instanc

using namespace ::torch::executor;

Module module("/path/to/model.pte", Module::MlockConfig::UseMlock, std::make_unique<ETDumpGen>());
Module module("/path/to/model.pte", Module::LoadMode::MmapUseMlock, std::make_unique<ETDumpGen>());

// Execute a method, e.g. module.forward(...); or module.execute("my_method", ...);

Expand Down
11 changes: 6 additions & 5 deletions docs/source/llm/getting-started.md
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,8 @@ penalties for repeated tokens, and biases to prioritize or de-prioritize specifi
```cpp
// main.cpp

using namespace torch::executor;

int main() {
// Set up the prompt. This provides the seed text for the model to elaborate.
std::cout << "Enter model prompt: ";
Expand All @@ -327,7 +329,7 @@ int main() {
BasicSampler sampler = BasicSampler();

// Load the exported nanoGPT program, which was generated via the previous steps.
Module model("nanogpt.pte", torch::executor::Module::MlockConfig::UseMlockIgnoreErrors);
Module model("nanogpt.pte", Module::LoadMode::MmapUseMlockIgnoreErrors);

const auto max_input_tokens = 1024;
const auto max_output_tokens = 30;
Expand Down Expand Up @@ -787,15 +789,14 @@ Include the ETDump header in your code.

Create an Instance of the ETDumpGen class and pass it to the Module constructor.
```cpp
std::unique_ptr<torch::executor::ETDumpGen> etdump_gen_ = std::make_unique<torch::executor::ETDumpGen>();
Module model("nanogpt.pte", torch::executor::Module::MlockConfig::UseMlockIgnoreErrors, std::move(etdump_gen_));
std::unique_ptr<ETDumpGen> etdump_gen_ = std::make_unique<ETDumpGen>();
Module model("nanogpt.pte", Module::LoadMode::MmapUseMlockIgnoreErrors, std::move(etdump_gen_));
```

After calling `generate()`, save the ETDump to a file. You can capture multiple
model runs in a single trace, if desired.
```cpp
torch::executor::ETDumpGen* etdump_gen =
static_cast<torch::executor::ETDumpGen*>(model.event_tracer());
ETDumpGen* etdump_gen = static_cast<ETDumpGen*>(model.event_tracer());

ET_LOG(Info, "ETDump size: %zu blocks", etdump_gen->get_num_blocks());
etdump_result result = etdump_gen->get_etdump_data();
Expand Down
4 changes: 1 addition & 3 deletions examples/llm_manual/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,9 +110,7 @@ int main() {

// Load the exported nanoGPT program, which was generated via the previous
// steps.
Module model(
"nanogpt.pte",
torch::executor::Module::MlockConfig::UseMlockIgnoreErrors);
Module model("nanogpt.pte", Module::LoadMode::MmapUseMlockIgnoreErrors);

const auto max_input_tokens = 1024;
const auto max_output_tokens = 30;
Expand Down
2 changes: 1 addition & 1 deletion examples/models/phi-3-mini/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ int main() {

SentencePieceTokenizer tokenizer("tokenizer.model");

Module model("phi-3-mini.pte", Module::MlockConfig::UseMlockIgnoreErrors);
Module model("phi-3-mini.pte", Module::LoadMode::MmapUseMlockIgnoreErrors);

const auto max_output_tokens = 128;
generate(model, prompt, tokenizer, max_output_tokens);
Expand Down
4 changes: 2 additions & 2 deletions examples/qualcomm/llama2/runner/runner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ Runner::Runner(
const float temperature)
: module_(std::make_unique<Module>(
model_path,
Module::MlockConfig::UseMlockIgnoreErrors)),
Module::LoadMode::MmapUseMlockIgnoreErrors)),
tokenizer_path_(tokenizer_path),
model_path_(model_path),
temperature_(temperature) {
Expand Down Expand Up @@ -649,7 +649,7 @@ Error Runner::mem_alloc(size_t alignment, size_t seq_len) {
// Reset and re-init again to trigger registered function
module_.reset();
module_ = std::make_unique<Module>(
model_path_, Module::MlockConfig::UseMlockIgnoreErrors),
model_path_, Module::LoadMode::MmapUseMlockIgnoreErrors),
ET_CHECK_MSG(load() == Error::Ok, "Runner failed to load method");

return Error::Ok;
Expand Down
7 changes: 3 additions & 4 deletions extension/android/jni/jni_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ class JEValue : public facebook::jni::JavaClass<JEValue> {
class ExecuTorchJni : public facebook::jni::HybridClass<ExecuTorchJni> {
private:
friend HybridBase;
std::unique_ptr<torch::executor::Module> module_;
std::unique_ptr<Module> module_;

public:
constexpr static auto kJavaDescriptor = "Lorg/pytorch/executorch/NativePeer;";
Expand All @@ -252,9 +252,8 @@ class ExecuTorchJni : public facebook::jni::HybridClass<ExecuTorchJni> {
facebook::jni::alias_ref<
facebook::jni::JMap<facebook::jni::JString, facebook::jni::JString>>
extraFiles) {
module_ = std::make_unique<torch::executor::Module>(
modelPath->toStdString(),
torch::executor::Module::MlockConfig::NoMlock);
module_ = std::make_unique<Module>(
modelPath->toStdString(), Module::LoadMode::Mmap);
}

facebook::jni::local_ref<facebook::jni::JArrayClass<JEValue>> forward(
Expand Down
44 changes: 25 additions & 19 deletions extension/module/module.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

#include <executorch/extension/module/module.h>

#include <executorch/extension/data_loader/file_data_loader.h>
#include <executorch/extension/data_loader/mmap_data_loader.h>
#include <executorch/extension/memory_allocator/malloc_memory_allocator.h>
#include <executorch/runtime/platform/runtime.h>
Expand Down Expand Up @@ -36,10 +37,10 @@ namespace torch::executor {

Module::Module(
const std::string& file_path,
const Module::MlockConfig mlock_config,
const Module::LoadMode load_mode,
std::unique_ptr<EventTracer> event_tracer)
: file_path_(file_path),
mlock_config_(mlock_config),
load_mode_(load_mode),
memory_allocator_(std::make_unique<util::MallocMemoryAllocator>()),
temp_allocator_(std::make_unique<util::MallocMemoryAllocator>()),
event_tracer_(std::move(event_tracer)) {
Expand All @@ -49,36 +50,41 @@ Module::Module(
Module::Module(
std::unique_ptr<DataLoader> data_loader,
std::unique_ptr<MemoryAllocator> memory_allocator,
std::unique_ptr<MemoryAllocator> tmp_memory_allocator,
std::unique_ptr<MemoryAllocator> temp_allocator,
std::unique_ptr<EventTracer> event_tracer)
: data_loader_(std::move(data_loader)),
memory_allocator_(
memory_allocator ? std::move(memory_allocator)
: std::make_unique<util::MallocMemoryAllocator>()),
temp_allocator_(

tmp_memory_allocator
? std::move(tmp_memory_allocator)
: std::make_unique<util::MallocMemoryAllocator>()),
temp_allocator ? std::move(temp_allocator)
: std::make_unique<util::MallocMemoryAllocator>()),
event_tracer_(std::move(event_tracer)) {
runtime_init();
}

Error Module::load(const Program::Verification verification) {
if (!is_loaded()) {
if (!data_loader_) {
data_loader_ = ET_UNWRAP_UNIQUE(
util::MmapDataLoader::from(file_path_.c_str(), [this] {
switch (mlock_config_) {
case MlockConfig::NoMlock:
return util::MmapDataLoader::MlockConfig::NoMlock;
case MlockConfig::UseMlock:
return util::MmapDataLoader::MlockConfig::UseMlock;
case MlockConfig::UseMlockIgnoreErrors:
return util::MmapDataLoader::MlockConfig::UseMlockIgnoreErrors;
}
ET_ASSERT_UNREACHABLE();
}()));
switch (load_mode_) {
case LoadMode::File:
data_loader_ =
ET_UNWRAP_UNIQUE(util::FileDataLoader::from(file_path_.c_str()));
break;
case LoadMode::Mmap:
data_loader_ =
ET_UNWRAP_UNIQUE(util::MmapDataLoader::from(file_path_.c_str()));
break;
case LoadMode::MmapUseMlock:
data_loader_ = ET_UNWRAP_UNIQUE(util::MmapDataLoader::from(
file_path_.c_str(), util::MmapDataLoader::MlockConfig::NoMlock));
break;
case LoadMode::MmapUseMlockIgnoreErrors:
data_loader_ = ET_UNWRAP_UNIQUE(util::MmapDataLoader::from(
file_path_.c_str(),
util::MmapDataLoader::MlockConfig::UseMlockIgnoreErrors));
break;
}
};
program_ =
ET_UNWRAP_UNIQUE(Program::load(data_loader_.get(), verification));
Expand Down
26 changes: 14 additions & 12 deletions extension/module/module.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,42 +24,44 @@ namespace torch::executor {
class Module final {
public:
/**
* Enum to define memory locking behavior.
* Enum to define loading behavior.
*/
enum class MlockConfig {
/// Do not use memory locking.
NoMlock,
enum class LoadMode {
/// Load the whole file as a buffer.
File,
/// Use mmap to load pages into memory.
Mmap,
/// Use memory locking and handle errors.
UseMlock,
MmapUseMlock,
/// Use memory locking and ignore errors.
UseMlockIgnoreErrors,
MmapUseMlockIgnoreErrors,
};

/**
* Constructs an instance by loading a program from a file with specified
* memory locking behavior.
*
* @param[in] file_path The path to the ExecuTorch program file to load.
* @param[in] mlock_config The memory locking configuration to use.
* @param[in] load_mode The loading mode to use.
*/
explicit Module(
const std::string& file_path,
const MlockConfig mlock_config = MlockConfig::UseMlock,
const LoadMode load_mode = LoadMode::MmapUseMlock,
std::unique_ptr<EventTracer> event_tracer = nullptr);

/**
* Constructs an instance with the provided data loader and memory allocator.
*
* @param[in] data_loader A DataLoader used for loading program data.
* @param[in] memory_allocator A MemoryAllocator used for memory management.
* @param[in] tmp_memory_allocator A MemoryAllocator used for allocating
* memory during execution time.
* @param[in] temp_allocator A MemoryAllocator to use when allocating
* temporary data during kernel or delegate execution.
* @param[in] event_tracer A EventTracer used for tracking and logging events.
*/
explicit Module(
std::unique_ptr<DataLoader> data_loader,
std::unique_ptr<MemoryAllocator> memory_allocator = nullptr,
std::unique_ptr<MemoryAllocator> tmp_memory_allocator = nullptr,
std::unique_ptr<MemoryAllocator> temp_allocator = nullptr,
std::unique_ptr<EventTracer> event_tracer = nullptr);
Module(const Module&) = delete;
Module& operator=(const Module&) = delete;
Expand Down Expand Up @@ -215,7 +217,7 @@ class Module final {

private:
std::string file_path_;
MlockConfig mlock_config_{MlockConfig::NoMlock};
LoadMode load_mode_{LoadMode::MmapUseMlock};
std::unique_ptr<DataLoader> data_loader_;
std::unique_ptr<MemoryAllocator> memory_allocator_;
std::unique_ptr<MemoryAllocator> temp_allocator_;
Expand Down
1 change: 1 addition & 0 deletions extension/module/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ def define_common_targets():
],
deps = [
"//executorch/extension/memory_allocator:malloc_memory_allocator",
"//executorch/extension/data_loader:file_data_loader",
"//executorch/extension/data_loader:mmap_data_loader",
],
exported_deps = [
Expand Down
Loading