Let Module use FIleDataLoader when requested. (#4174)

shoumikhin · facebook-github-bot · commit 50e94c77c861 · 2024-07-10T09:59:08.000-07:00
Summary: Pull Request resolved: #4174 Hide more data loader options behind the facade. Reviewed By: kirklandsign Differential Revision: D59498348
diff --git a/docs/source/extension-module.md b/docs/source/extension-module.md
@@ -136,7 +136,7 @@ Use [ExecuTorch Dump](sdk-etdump.md) to trace model execution. Create an instanc
 
 using namespace ::torch::executor;
 
-Module module("/path/to/model.pte", Module::MlockConfig::UseMlock, std::make_unique<ETDumpGen>());
+Module module("/path/to/model.pte", Module::LoadMode::MmapUseMlock, std::make_unique<ETDumpGen>());
 
 // Execute a method, e.g. module.forward(...); or module.execute("my_method", ...);
 
diff --git a/docs/source/llm/getting-started.md b/docs/source/llm/getting-started.md
@@ -313,6 +313,8 @@ penalties for repeated tokens, and biases to prioritize or de-prioritize specifi
 ```cpp
 // main.cpp
 
+using namespace torch::executor;
+
 int main() {
     // Set up the prompt. This provides the seed text for the model to elaborate.
     std::cout << "Enter model prompt: ";
@@ -327,7 +329,7 @@ int main() {
     BasicSampler sampler = BasicSampler();
 
     // Load the exported nanoGPT program, which was generated via the previous steps.
-    Module model("nanogpt.pte", torch::executor::Module::MlockConfig::UseMlockIgnoreErrors);
+    Module model("nanogpt.pte", Module::LoadMode::MmapUseMlockIgnoreErrors);
 
     const auto max_input_tokens = 1024;
     const auto max_output_tokens = 30;
@@ -787,15 +789,14 @@ Include the ETDump header in your code.
 
 Create an Instance of the ETDumpGen class and pass it to the Module constructor.
 ```cpp
-std::unique_ptr<torch::executor::ETDumpGen> etdump_gen_ = std::make_unique<torch::executor::ETDumpGen>();
-Module model("nanogpt.pte", torch::executor::Module::MlockConfig::UseMlockIgnoreErrors, std::move(etdump_gen_));
+std::unique_ptr<ETDumpGen> etdump_gen_ = std::make_unique<ETDumpGen>();
+Module model("nanogpt.pte", Module::LoadMode::MmapUseMlockIgnoreErrors, std::move(etdump_gen_));
 ```
 
 After calling `generate()`, save the ETDump to a file. You can capture multiple
 model runs in a single trace, if desired.
 ```cpp
-torch::executor::ETDumpGen* etdump_gen =
-    static_cast<torch::executor::ETDumpGen*>(model.event_tracer());
+ETDumpGen* etdump_gen = static_cast<ETDumpGen*>(model.event_tracer());
 
 ET_LOG(Info, "ETDump size: %zu blocks", etdump_gen->get_num_blocks());
 etdump_result result = etdump_gen->get_etdump_data();
diff --git a/examples/llm_manual/main.cpp b/examples/llm_manual/main.cpp
@@ -110,9 +110,7 @@ int main() {
 
   // Load the exported nanoGPT program, which was generated via the previous
   // steps.
-  Module model(
-      "nanogpt.pte",
-      torch::executor::Module::MlockConfig::UseMlockIgnoreErrors);
+  Module model("nanogpt.pte", Module::LoadMode::MmapUseMlockIgnoreErrors);
 
   const auto max_input_tokens = 1024;
   const auto max_output_tokens = 30;
diff --git a/examples/models/phi-3-mini/main.cpp b/examples/models/phi-3-mini/main.cpp
@@ -83,7 +83,7 @@ int main() {
 
   SentencePieceTokenizer tokenizer("tokenizer.model");
 
-  Module model("phi-3-mini.pte", Module::MlockConfig::UseMlockIgnoreErrors);
+  Module model("phi-3-mini.pte", Module::LoadMode::MmapUseMlockIgnoreErrors);
 
   const auto max_output_tokens = 128;
   generate(model, prompt, tokenizer, max_output_tokens);
diff --git a/examples/qualcomm/llama2/runner/runner.cpp b/examples/qualcomm/llama2/runner/runner.cpp
@@ -38,7 +38,7 @@ Runner::Runner(
     const float temperature)
     : module_(std::make_unique<Module>(
           model_path,
-          Module::MlockConfig::UseMlockIgnoreErrors)),
+          Module::LoadMode::MmapUseMlockIgnoreErrors)),
       tokenizer_path_(tokenizer_path),
       model_path_(model_path),
       temperature_(temperature) {
@@ -649,7 +649,7 @@ Error Runner::mem_alloc(size_t alignment, size_t seq_len) {
   // Reset and re-init again to trigger registered function
   module_.reset();
   module_ = std::make_unique<Module>(
-      model_path_, Module::MlockConfig::UseMlockIgnoreErrors),
+      model_path_, Module::LoadMode::MmapUseMlockIgnoreErrors),
   ET_CHECK_MSG(load() == Error::Ok, "Runner failed to load method");
 
   return Error::Ok;
diff --git a/extension/android/jni/jni_layer.cpp b/extension/android/jni/jni_layer.cpp
@@ -233,7 +233,7 @@ class JEValue : public facebook::jni::JavaClass<JEValue> {
 class ExecuTorchJni : public facebook::jni::HybridClass<ExecuTorchJni> {
  private:
   friend HybridBase;
-  std::unique_ptr<torch::executor::Module> module_;
+  std::unique_ptr<Module> module_;
 
  public:
   constexpr static auto kJavaDescriptor = "Lorg/pytorch/executorch/NativePeer;";
@@ -252,9 +252,8 @@ class ExecuTorchJni : public facebook::jni::HybridClass<ExecuTorchJni> {
       facebook::jni::alias_ref<
           facebook::jni::JMap<facebook::jni::JString, facebook::jni::JString>>
           extraFiles) {
-    module_ = std::make_unique<torch::executor::Module>(
-        modelPath->toStdString(),
-        torch::executor::Module::MlockConfig::NoMlock);
+    module_ = std::make_unique<Module>(
+        modelPath->toStdString(), Module::LoadMode::Mmap);
   }
 
   facebook::jni::local_ref<facebook::jni::JArrayClass<JEValue>> forward(
diff --git a/extension/module/module.cpp b/extension/module/module.cpp
@@ -8,6 +8,7 @@
 
 #include <executorch/extension/module/module.h>
 
+#include <executorch/extension/data_loader/file_data_loader.h>
 #include <executorch/extension/data_loader/mmap_data_loader.h>
 #include <executorch/extension/memory_allocator/malloc_memory_allocator.h>
 #include <executorch/runtime/platform/runtime.h>
@@ -36,10 +37,10 @@ namespace torch::executor {
 
 Module::Module(
     const std::string& file_path,
-    const Module::MlockConfig mlock_config,
+    const Module::LoadMode load_mode,
     std::unique_ptr<EventTracer> event_tracer)
     : file_path_(file_path),
-      mlock_config_(mlock_config),
+      load_mode_(load_mode),
       memory_allocator_(std::make_unique<util::MallocMemoryAllocator>()),
       temp_allocator_(std::make_unique<util::MallocMemoryAllocator>()),
       event_tracer_(std::move(event_tracer)) {
@@ -49,36 +50,41 @@ Module::Module(
 Module::Module(
     std::unique_ptr<DataLoader> data_loader,
     std::unique_ptr<MemoryAllocator> memory_allocator,
-    std::unique_ptr<MemoryAllocator> tmp_memory_allocator,
+    std::unique_ptr<MemoryAllocator> temp_allocator,
     std::unique_ptr<EventTracer> event_tracer)
     : data_loader_(std::move(data_loader)),
       memory_allocator_(
           memory_allocator ? std::move(memory_allocator)
                            : std::make_unique<util::MallocMemoryAllocator>()),
       temp_allocator_(
-
-          tmp_memory_allocator
-              ? std::move(tmp_memory_allocator)
-              : std::make_unique<util::MallocMemoryAllocator>()),
+          temp_allocator ? std::move(temp_allocator)
+                         : std::make_unique<util::MallocMemoryAllocator>()),
       event_tracer_(std::move(event_tracer)) {
   runtime_init();
 }
 
 Error Module::load(const Program::Verification verification) {
   if (!is_loaded()) {
     if (!data_loader_) {
-      data_loader_ = ET_UNWRAP_UNIQUE(
-          util::MmapDataLoader::from(file_path_.c_str(), [this] {
-            switch (mlock_config_) {
-              case MlockConfig::NoMlock:
-                return util::MmapDataLoader::MlockConfig::NoMlock;
-              case MlockConfig::UseMlock:
-                return util::MmapDataLoader::MlockConfig::UseMlock;
-              case MlockConfig::UseMlockIgnoreErrors:
-                return util::MmapDataLoader::MlockConfig::UseMlockIgnoreErrors;
-            }
-            ET_ASSERT_UNREACHABLE();
-          }()));
+      switch (load_mode_) {
+        case LoadMode::File:
+          data_loader_ =
+              ET_UNWRAP_UNIQUE(util::FileDataLoader::from(file_path_.c_str()));
+          break;
+        case LoadMode::Mmap:
+          data_loader_ =
+              ET_UNWRAP_UNIQUE(util::MmapDataLoader::from(file_path_.c_str()));
+          break;
+        case LoadMode::MmapUseMlock:
+          data_loader_ = ET_UNWRAP_UNIQUE(util::MmapDataLoader::from(
+              file_path_.c_str(), util::MmapDataLoader::MlockConfig::NoMlock));
+          break;
+        case LoadMode::MmapUseMlockIgnoreErrors:
+          data_loader_ = ET_UNWRAP_UNIQUE(util::MmapDataLoader::from(
+              file_path_.c_str(),
+              util::MmapDataLoader::MlockConfig::UseMlockIgnoreErrors));
+          break;
+      }
     };
     program_ =
         ET_UNWRAP_UNIQUE(Program::load(data_loader_.get(), verification));
diff --git a/extension/module/module.h b/extension/module/module.h
@@ -24,42 +24,44 @@ namespace torch::executor {
 class Module final {
  public:
   /**
-   * Enum to define memory locking behavior.
+   * Enum to define loading behavior.
    */
-  enum class MlockConfig {
-    /// Do not use memory locking.
-    NoMlock,
+  enum class LoadMode {
+    /// Load the whole file as a buffer.
+    File,
+    /// Use mmap to load pages into memory.
+    Mmap,
     /// Use memory locking and handle errors.
-    UseMlock,
+    MmapUseMlock,
     /// Use memory locking and ignore errors.
-    UseMlockIgnoreErrors,
+    MmapUseMlockIgnoreErrors,
   };
 
   /**
    * Constructs an instance by loading a program from a file with specified
    * memory locking behavior.
    *
    * @param[in] file_path The path to the ExecuTorch program file to load.
-   * @param[in] mlock_config The memory locking configuration to use.
+   * @param[in] load_mode The loading mode to use.
    */
   explicit Module(
       const std::string& file_path,
-      const MlockConfig mlock_config = MlockConfig::UseMlock,
+      const LoadMode load_mode = LoadMode::MmapUseMlock,
       std::unique_ptr<EventTracer> event_tracer = nullptr);
 
   /**
    * Constructs an instance with the provided data loader and memory allocator.
    *
    * @param[in] data_loader A DataLoader used for loading program data.
    * @param[in] memory_allocator A MemoryAllocator used for memory management.
-   * @param[in] tmp_memory_allocator A MemoryAllocator used for allocating
-   * memory during execution time.
+   * @param[in] temp_allocator A MemoryAllocator to use when allocating
+   * temporary data during kernel or delegate execution.
    * @param[in] event_tracer A EventTracer used for tracking and logging events.
    */
   explicit Module(
       std::unique_ptr<DataLoader> data_loader,
       std::unique_ptr<MemoryAllocator> memory_allocator = nullptr,
-      std::unique_ptr<MemoryAllocator> tmp_memory_allocator = nullptr,
+      std::unique_ptr<MemoryAllocator> temp_allocator = nullptr,
       std::unique_ptr<EventTracer> event_tracer = nullptr);
   Module(const Module&) = delete;
   Module& operator=(const Module&) = delete;
@@ -215,7 +217,7 @@ class Module final {
 
  private:
   std::string file_path_;
-  MlockConfig mlock_config_{MlockConfig::NoMlock};
+  LoadMode load_mode_{LoadMode::MmapUseMlock};
   std::unique_ptr<DataLoader> data_loader_;
   std::unique_ptr<MemoryAllocator> memory_allocator_;
   std::unique_ptr<MemoryAllocator> temp_allocator_;
diff --git a/extension/module/targets.bzl b/extension/module/targets.bzl
@@ -23,6 +23,7 @@ def define_common_targets():
             ],
             deps = [
                 "//executorch/extension/memory_allocator:malloc_memory_allocator",
+                "//executorch/extension/data_loader:file_data_loader",
                 "//executorch/extension/data_loader:mmap_data_loader",
             ],
             exported_deps = [