Use common LLM interface

kirklandsign · cmodi-meta · commit cdbcab247feb · 2024-10-29T11:36:57.000-07:00
diff --git a/examples/mediatek/executor_runner/mtk_llama_runner.cpp b/examples/mediatek/executor_runner/mtk_llama_runner.cpp
@@ -120,7 +120,9 @@ Error MTKLlamaRunner::generate(
     const std::string& prompt,
     int32_t seq_len,
     std::function<void(const std::string&)> token_callback,
-    std::function<void(const Stats&)> stats_callback) {
+    std::function<void(const Stats&)> stats_callback
+      bool,
+      bool) {
   if (!is_loaded()) {
     ET_CHECK_OK_OR_RETURN_ERROR(load());
   }
diff --git a/examples/mediatek/executor_runner/mtk_llama_runner.h b/examples/mediatek/executor_runner/mtk_llama_runner.h
@@ -12,6 +12,7 @@
 #pragma once
 
 #include <executorch/examples/models/llama/tokenizer/llama_tiktoken.h>
+#include <executorch/extension/llm/runner/runner_interface.h>
 #include <executorch/extension/llm/runner/stats.h>
 #include <executorch/extension/llm/tokenizer/bpe_tokenizer.h>
 #include <executorch/extension/llm/tokenizer/tiktoken.h>
@@ -31,7 +32,8 @@ using executorch::extension::llm::Tokenizer;
 using executorch::runtime::Error;
 using executorch::runtime::Result;
 
-class MTKLlamaRunner {
+class MTKLlamaRunner
+    : public executorch::extension::llm::RunnerInterface {
  public:
   explicit MTKLlamaRunner(
       const std::string& model_path,
@@ -44,7 +46,9 @@ class MTKLlamaRunner {
       const std::string& prompt,
       int32_t seq_len = 128,
       std::function<void(const std::string&)> token_callback = {},
-      std::function<void(const Stats&)> stats_callback = {});
+      std::function<void(const Stats&)> stats_callback = {},
+      bool echo = true,
+      bool warming = false);
   void stop();
 
   LlamaModelOptions get_model_options();
diff --git a/extension/android/CMakeLists.txt b/extension/android/CMakeLists.txt
@@ -179,6 +179,7 @@ if(EXECUTORCH_BUILD_LLAMA_JNI)
     ADD_LIBRARY(libneuron_buffer_allocator SHARED IMPORTED)
     SET_PROPERTY(TARGET libneuron_buffer_allocator PROPERTY IMPORTED_LOCATION ${NEURON_BUFFER_ALLOCATOR_LIB})
     list(APPEND link_libraries neuron_backend libneuron_buffer_allocator)
+    target_compile_definitions(executorch_jni PRIVATE EXECUTORCH_BUILD_MEDIATEK=1)
   endif()
 endif()
 
diff --git a/extension/android/jni/jni_layer_llama.cpp b/extension/android/jni/jni_layer_llama.cpp
@@ -13,10 +13,10 @@
 #include <unordered_map>
 #include <vector>
 
-#include <executorch/examples/mediatek/executor_runner/mtk_llama_runner.h>
 #include <executorch/examples/models/llama/runner/runner.h>
 #include <executorch/examples/models/llava/runner/llava_runner.h>
 #include <executorch/extension/llm/runner/image.h>
+#include <executorch/extension/llm/runner/runner_interface.h>
 #include <executorch/runtime/platform/log.h>
 #include <executorch/runtime/platform/platform.h>
 #include <executorch/runtime/platform/runtime.h>
@@ -29,6 +29,10 @@
 #include <fbjni/ByteBuffer.h>
 #include <fbjni/fbjni.h>
 
+#if defined(EXECUTORCH_BUILD_MEDIATEK)
+#include <executorch/examples/mediatek/executor_runner/mtk_llama_runner.h>
+#endif
+
 namespace llm = ::executorch::extension::llm;
 using ::executorch::runtime::Error;
 
@@ -112,9 +116,8 @@ class ExecuTorchLlamaJni
  private:
   friend HybridBase;
   int model_type_category_;
-  std::unique_ptr<example::Runner> runner_;
+  std::unique_ptr<llm::RunnerInterface> runner_;
   std::unique_ptr<llm::MultimodalRunner> multi_modal_runner_;
-  std::unique_ptr<MTKLlamaRunner> mtk_llama_runner_;
 
  public:
   constexpr static auto kJavaDescriptor =
@@ -161,11 +164,15 @@ class ExecuTorchLlamaJni
           model_path->toStdString().c_str(),
           tokenizer_path->toStdString().c_str(),
           temperature);
+#if defined(EXECUTORCH_BUILD_MEDIATEK)
     } else if (model_type_category == MODEL_TYPE_MEDIATEK_LLAMA) {
-      mtk_llama_runner_ = std::make_unique<MTKLlamaRunner>(
+      runner_ = std::make_unique<MTKLlamaRunner>(
           model_path->toStdString().c_str(),
           tokenizer_path->toStdString().c_str(),
           temperature);
+      // Interpret the model type as LLM
+      model_type_category_ = MODEL_TYPE_CATEGORY_LLM;
+#endif
     }
   }
 
@@ -205,12 +212,6 @@ class ExecuTorchLlamaJni
           [callback](std::string result) { callback->onResult(result); },
           [callback](const llm::Stats& result) { callback->onStats(result); },
           echo);
-    } else if (model_type_category_ == MODEL_TYPE_MEDIATEK_LLAMA) {
-      mtk_llama_runner_->generate(
-          prompt->toStdString(),
-          seq_len,
-          [callback](std::string result) { callback->onResult(result); },
-          [callback](const Stats& result) { callback->onStats(result); });
     }
     return 0;
   }
@@ -300,8 +301,6 @@ class ExecuTorchLlamaJni
       multi_modal_runner_->stop();
     } else if (model_type_category_ == MODEL_TYPE_CATEGORY_LLM) {
       runner_->stop();
-    } else if (model_type_category_ == MODEL_TYPE_MEDIATEK_LLAMA) {
-      mtk_llama_runner_->stop();
     }
   }
 
@@ -310,8 +309,6 @@ class ExecuTorchLlamaJni
       return static_cast<jint>(multi_modal_runner_->load());
     } else if (model_type_category_ == MODEL_TYPE_CATEGORY_LLM) {
       return static_cast<jint>(runner_->load());
-    } else if (model_type_category_ == MODEL_TYPE_MEDIATEK_LLAMA) {
-      return static_cast<jint>(mtk_llama_runner_->load());
     }
     return static_cast<jint>(Error::InvalidArgument);
   }