namespace changes to runner and jni layer

cmodi-meta · cmodi-meta · commit 4e310cb9c8c8 · 2024-10-16T11:29:33.000-07:00
diff --git a/examples/mediatek/executor_runner/llama_runner/llm_helper/include/llama_runner_values.h b/examples/mediatek/executor_runner/llama_runner/llm_helper/include/llama_runner_values.h
@@ -1,7 +1,17 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+// Contains values that are used by the mtk_llama_runner.cpp
+
 #pragma once
 
-namespace torch::executor {
-  using llm_helper::LLMType;
+namespace mtk::vars {
+  using example::llm_helper::LLMType;
 
   // Sizes
   const size_t PROMPT_TOKEN_BATCH_SIZE = 128;
@@ -29,4 +39,4 @@ namespace torch::executor {
   // Comma-Separated Paths
   const std::string GEN_MODEL_PATHS="/data/local/tmp/et-mtk/llama3/llama3-8B-instruct_A16W4_4_chunks_1t512c_0.pte,/data/local/tmp/et-mtk/llama3/llama3-8B-instruct_A16W4_4_chunks_1t512c_1.pte,/data/local/tmp/et-mtk/llama3/llama3-8B-instruct_A16W4_4_chunks_1t512c_2.pte,/data/local/tmp/et-mtk/llama3/llama3-8B-instruct_A16W4_4_chunks_1t512c_3.pte,";
 
-} // namespace torch::executor
+} // namespace mtk:vars
diff --git a/examples/mediatek/executor_runner/mtk_llama_runner.cpp b/examples/mediatek/executor_runner/mtk_llama_runner.cpp
@@ -73,17 +73,22 @@ static uint64_t MAX_RESPONSE = 50; // Maximum number of tokens to generate.
 static constexpr int8_t kAddBos = 1;
 static constexpr int8_t kAddEos = 0;
 
-using namespace torch::executor;
-using namespace torch::executor::llm_helper;
-using torch::executor::utils::Timer;
+using namespace example::llm_helper;
+using example::utils::argmax;
+using example::utils::split;
+using example::utils::Timer;
+using example::utils::to_string;
+using namespace mtk::vars;
+
+namespace llm = ::executorch::extension::llm;
 
 MTKLlamaRunner::MTKLlamaRunner(
   const std::string& model_path,
   const std::string& tokenizer_path,
   const float temperature)
   : modeloptions_(get_model_options()),
     modelpaths_(get_model_paths()) {
-  runtime_init();
+  executorch::runtime::runtime_init();
   ET_LOG(
         Info,
         "Creating MTK Llama runner. Current it will self-load .pte, .bin, and .so files. Initiated runtime_init().");
@@ -125,7 +130,7 @@ Error MTKLlamaRunner::generate(
   // Wrap the token_callback with print function
   std::function<void(const std::string&)> wrapped_callback =
       [token_callback](const std::string& piece) {
-        util::safe_printf(piece.c_str());
+        llm::safe_printf(piece.c_str());
         fflush(stdout);
         if (token_callback) {
           token_callback(piece);
@@ -172,8 +177,8 @@ LlamaModelPaths MTKLlamaRunner::get_model_paths() {
   LlamaModelPaths model_paths = {
       .tokenizer_path = TOKENIZER_PATH,
       .token_embedding_path = TOKEN_EMBEDDING_PATH,
-      .prompt_model_paths = utils::split(PROMPT_MODEL_PATHS, ','),
-      .gen_model_paths = utils::split(GEN_MODEL_PATHS, ',')};
+      .prompt_model_paths = split(PROMPT_MODEL_PATHS, ','),
+      .gen_model_paths = split(GEN_MODEL_PATHS, ',')};
   ET_LOG(Info, "Completed get_model_paths");   
   return model_paths;
 }
@@ -225,8 +230,7 @@ Result<uint64_t> MTKLlamaRunner::digest_prompt(
 
   const auto vocab_size = tokenizer->vocab_size();
   const auto logits_type = llama_runtime.GetModelOptions().model_output_type;
-  const auto first_output_token =
-      utils::argmax(logits_type, logits, vocab_size);
+  const auto first_output_token = argmax(logits_type, logits, vocab_size);
   return first_output_token;
 }
 
@@ -273,7 +277,7 @@ Error MTKLlamaRunner::gen_response(
     timer_gen_token.End();
 
     prev_token = output_token;
-    output_token = utils::argmax(logits_type, logits, vocab_size);
+    output_token = argmax(logits_type, logits, vocab_size);
     full_response_tokens.push_back(output_token);
 
     // Stop when output is EOS
@@ -293,7 +297,7 @@ Error MTKLlamaRunner::gen_response(
   }
 
   std::cout << "\n\n[Generated Tokens]\n"
-            << utils::to_string(full_response_tokens) << std::endl;
+            << to_string(full_response_tokens) << std::endl;
 
   ET_LOG(
       Info,
@@ -327,7 +331,7 @@ Error MTKLlamaRunner::inference(
 std::unique_ptr<Tokenizer> MTKLlamaRunner::load_tokenizer() {
   std::unique_ptr<Tokenizer> tokenizer;
   // Assumes that tokenizer type is Tiktoken
-  tokenizer = torch::executor::get_tiktoken_for_llama();
+  tokenizer = example::get_tiktoken_for_llama();
   tokenizer->load(modelpaths_.tokenizer_path);
   return tokenizer;
 }
diff --git a/examples/mediatek/executor_runner/mtk_llama_runner.h b/examples/mediatek/executor_runner/mtk_llama_runner.h
@@ -22,9 +22,15 @@
 
 #include "llama_runner/LlamaConfig.h"
 #include "llama_runner/LlamaRuntime.h"
-using namespace torch::executor;
 using Stats = ::executorch::llm::Stats;
 
+using example::LlamaModelOptions;
+using example::LlamaModelPaths;
+using example::LlamaRuntime;
+using executorch::extension::llm::Tokenizer;
+using executorch::runtime::Error;
+using executorch::runtime::Result;
+
 class MTKLlamaRunner {
  public:
   explicit MTKLlamaRunner(
@@ -62,8 +68,8 @@ class MTKLlamaRunner {
 
  private:
   // model
-  const torch::executor::LlamaModelOptions modeloptions_;
-  const torch::executor::LlamaModelPaths modelpaths_;
+  const LlamaModelOptions modeloptions_;
+  const LlamaModelPaths modelpaths_;
   std::unique_ptr<Tokenizer> tokenizer_;
   std::unique_ptr<LlamaRuntime> runtime_;
 };
diff --git a/extension/android/CMakeLists.txt b/extension/android/CMakeLists.txt
@@ -173,7 +173,7 @@ if(EXECUTORCH_BUILD_LLAMA_JNI)
     ${EXECUTORCH_ROOT}/examples/mediatek/executor_runner/llama_runner
   )
   ADD_LIBRARY(libneuron_buffer_allocator SHARED IMPORTED)
-  SET_PROPERTY(TARGET libneuron_buffer_allocator PROPERTY IMPORTED_LOCATION ${NEURON_BUFFER_ALLOCATOR_LIB}/libneuron_buffer_allocator.so)
+  SET_PROPERTY(TARGET libneuron_buffer_allocator PROPERTY IMPORTED_LOCATION ${NEURON_BUFFER_ALLOCATOR_LIB})
   list(APPEND link_libraries neuron_backend libneuron_buffer_allocator)
 endif()
 

Original file line number	Diff line number	Diff line change
`@@ -173,7 +173,7 @@ if(EXECUTORCH_BUILD_LLAMA_JNI)`
`173`	`173`	`${EXECUTORCH_ROOT}/examples/mediatek/executor_runner/llama_runner`
`174`	`174`	`)`
`175`	`175`	`ADD_LIBRARY(libneuron_buffer_allocator SHARED IMPORTED)`
`176`		`- SET_PROPERTY(TARGET libneuron_buffer_allocator PROPERTY IMPORTED_LOCATION ${NEURON_BUFFER_ALLOCATOR_LIB}/libneuron_buffer_allocator.so)`
	`176`	`+ SET_PROPERTY(TARGET libneuron_buffer_allocator PROPERTY IMPORTED_LOCATION ${NEURON_BUFFER_ALLOCATOR_LIB})`
`177`	`177`	`list(APPEND link_libraries neuron_backend libneuron_buffer_allocator)`
`178`	`178`	`endif()`
`179`	`179`