Skip to content

Commit 4e310cb

Browse files
committed
namespace changes to runner and jni layer
1 parent a218821 commit 4e310cb

File tree

4 files changed

+39
-19
lines changed

4 files changed

+39
-19
lines changed

examples/mediatek/executor_runner/llama_runner/llm_helper/include/llama_runner_values.h

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,17 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
// Contains values that are used by the mtk_llama_runner.cpp
10+
111
#pragma once
212

3-
namespace torch::executor {
4-
using llm_helper::LLMType;
13+
namespace mtk::vars {
14+
using example::llm_helper::LLMType;
515

616
// Sizes
717
const size_t PROMPT_TOKEN_BATCH_SIZE = 128;
@@ -29,4 +39,4 @@ namespace torch::executor {
2939
// Comma-Separated Paths
3040
const std::string GEN_MODEL_PATHS="/data/local/tmp/et-mtk/llama3/llama3-8B-instruct_A16W4_4_chunks_1t512c_0.pte,/data/local/tmp/et-mtk/llama3/llama3-8B-instruct_A16W4_4_chunks_1t512c_1.pte,/data/local/tmp/et-mtk/llama3/llama3-8B-instruct_A16W4_4_chunks_1t512c_2.pte,/data/local/tmp/et-mtk/llama3/llama3-8B-instruct_A16W4_4_chunks_1t512c_3.pte,";
3141

32-
} // namespace torch::executor
42+
} // namespace mtk:vars

examples/mediatek/executor_runner/mtk_llama_runner.cpp

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -73,17 +73,22 @@ static uint64_t MAX_RESPONSE = 50; // Maximum number of tokens to generate.
7373
static constexpr int8_t kAddBos = 1;
7474
static constexpr int8_t kAddEos = 0;
7575

76-
using namespace torch::executor;
77-
using namespace torch::executor::llm_helper;
78-
using torch::executor::utils::Timer;
76+
using namespace example::llm_helper;
77+
using example::utils::argmax;
78+
using example::utils::split;
79+
using example::utils::Timer;
80+
using example::utils::to_string;
81+
using namespace mtk::vars;
82+
83+
namespace llm = ::executorch::extension::llm;
7984

8085
MTKLlamaRunner::MTKLlamaRunner(
8186
const std::string& model_path,
8287
const std::string& tokenizer_path,
8388
const float temperature)
8489
: modeloptions_(get_model_options()),
8590
modelpaths_(get_model_paths()) {
86-
runtime_init();
91+
executorch::runtime::runtime_init();
8792
ET_LOG(
8893
Info,
8994
"Creating MTK Llama runner. Current it will self-load .pte, .bin, and .so files. Initiated runtime_init().");
@@ -125,7 +130,7 @@ Error MTKLlamaRunner::generate(
125130
// Wrap the token_callback with print function
126131
std::function<void(const std::string&)> wrapped_callback =
127132
[token_callback](const std::string& piece) {
128-
util::safe_printf(piece.c_str());
133+
llm::safe_printf(piece.c_str());
129134
fflush(stdout);
130135
if (token_callback) {
131136
token_callback(piece);
@@ -172,8 +177,8 @@ LlamaModelPaths MTKLlamaRunner::get_model_paths() {
172177
LlamaModelPaths model_paths = {
173178
.tokenizer_path = TOKENIZER_PATH,
174179
.token_embedding_path = TOKEN_EMBEDDING_PATH,
175-
.prompt_model_paths = utils::split(PROMPT_MODEL_PATHS, ','),
176-
.gen_model_paths = utils::split(GEN_MODEL_PATHS, ',')};
180+
.prompt_model_paths = split(PROMPT_MODEL_PATHS, ','),
181+
.gen_model_paths = split(GEN_MODEL_PATHS, ',')};
177182
ET_LOG(Info, "Completed get_model_paths");
178183
return model_paths;
179184
}
@@ -225,8 +230,7 @@ Result<uint64_t> MTKLlamaRunner::digest_prompt(
225230

226231
const auto vocab_size = tokenizer->vocab_size();
227232
const auto logits_type = llama_runtime.GetModelOptions().model_output_type;
228-
const auto first_output_token =
229-
utils::argmax(logits_type, logits, vocab_size);
233+
const auto first_output_token = argmax(logits_type, logits, vocab_size);
230234
return first_output_token;
231235
}
232236

@@ -273,7 +277,7 @@ Error MTKLlamaRunner::gen_response(
273277
timer_gen_token.End();
274278

275279
prev_token = output_token;
276-
output_token = utils::argmax(logits_type, logits, vocab_size);
280+
output_token = argmax(logits_type, logits, vocab_size);
277281
full_response_tokens.push_back(output_token);
278282

279283
// Stop when output is EOS
@@ -293,7 +297,7 @@ Error MTKLlamaRunner::gen_response(
293297
}
294298

295299
std::cout << "\n\n[Generated Tokens]\n"
296-
<< utils::to_string(full_response_tokens) << std::endl;
300+
<< to_string(full_response_tokens) << std::endl;
297301

298302
ET_LOG(
299303
Info,
@@ -327,7 +331,7 @@ Error MTKLlamaRunner::inference(
327331
std::unique_ptr<Tokenizer> MTKLlamaRunner::load_tokenizer() {
328332
std::unique_ptr<Tokenizer> tokenizer;
329333
// Assumes that tokenizer type is Tiktoken
330-
tokenizer = torch::executor::get_tiktoken_for_llama();
334+
tokenizer = example::get_tiktoken_for_llama();
331335
tokenizer->load(modelpaths_.tokenizer_path);
332336
return tokenizer;
333337
}

examples/mediatek/executor_runner/mtk_llama_runner.h

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,15 @@
2222

2323
#include "llama_runner/LlamaConfig.h"
2424
#include "llama_runner/LlamaRuntime.h"
25-
using namespace torch::executor;
2625
using Stats = ::executorch::llm::Stats;
2726

27+
using example::LlamaModelOptions;
28+
using example::LlamaModelPaths;
29+
using example::LlamaRuntime;
30+
using executorch::extension::llm::Tokenizer;
31+
using executorch::runtime::Error;
32+
using executorch::runtime::Result;
33+
2834
class MTKLlamaRunner {
2935
public:
3036
explicit MTKLlamaRunner(
@@ -62,8 +68,8 @@ class MTKLlamaRunner {
6268

6369
private:
6470
// model
65-
const torch::executor::LlamaModelOptions modeloptions_;
66-
const torch::executor::LlamaModelPaths modelpaths_;
71+
const LlamaModelOptions modeloptions_;
72+
const LlamaModelPaths modelpaths_;
6773
std::unique_ptr<Tokenizer> tokenizer_;
6874
std::unique_ptr<LlamaRuntime> runtime_;
6975
};

extension/android/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,7 @@ if(EXECUTORCH_BUILD_LLAMA_JNI)
173173
${EXECUTORCH_ROOT}/examples/mediatek/executor_runner/llama_runner
174174
)
175175
ADD_LIBRARY(libneuron_buffer_allocator SHARED IMPORTED)
176-
SET_PROPERTY(TARGET libneuron_buffer_allocator PROPERTY IMPORTED_LOCATION ${NEURON_BUFFER_ALLOCATOR_LIB}/libneuron_buffer_allocator.so)
176+
SET_PROPERTY(TARGET libneuron_buffer_allocator PROPERTY IMPORTED_LOCATION ${NEURON_BUFFER_ALLOCATOR_LIB})
177177
list(APPEND link_libraries neuron_backend libneuron_buffer_allocator)
178178
endif()
179179

0 commit comments

Comments
 (0)