44
44
* any receiver's applicable license agreements with MediaTek Inc.
45
45
*/
46
46
47
- #include " executorch/backends/mediatek/runtime/include/NeuronBufferAllocator.h"
48
47
#include < executorch/examples/mediatek/executor_runner/mtk_llama_runner.h>
48
+ #include " executorch/backends/mediatek/runtime/include/NeuronBufferAllocator.h"
49
49
50
50
#include < ctime>
51
51
#include < iostream>
65
65
66
66
#include " llama_runner/ModelChunk.h"
67
67
#include " llama_runner/Utils.h"
68
- #include " llama_runner/llm_helper/include/llm_types.h"
69
68
#include " llama_runner/llm_helper/include/llama_runner_values.h"
69
+ #include " llama_runner/llm_helper/include/llm_types.h"
70
70
71
71
static uint64_t MAX_RESPONSE = 50 ; // Maximum number of tokens to generate.
72
72
// Global BOS and EOS option for tokenization (encoding)
@@ -83,15 +83,14 @@ using namespace mtk::vars;
83
83
namespace llm = ::executorch::extension::llm;
84
84
85
85
MTKLlamaRunner::MTKLlamaRunner (
86
- const std::string& model_path,
87
- const std::string& tokenizer_path,
88
- const float temperature)
89
- : modeloptions_(get_model_options()),
90
- modelpaths_(get_model_paths()) {
86
+ const std::string& model_path,
87
+ const std::string& tokenizer_path,
88
+ const float temperature)
89
+ : modeloptions_(get_model_options()), modelpaths_(get_model_paths()) {
91
90
executorch::runtime::runtime_init ();
92
91
ET_LOG (
93
- Info,
94
- " Creating MTK Llama runner. Current it will self-load .pte, .bin, and .so files. Initiated runtime_init()." );
92
+ Info,
93
+ " Creating MTK Llama runner. Current it will self-load .pte, .bin, and .so files. Initiated runtime_init()." );
95
94
}
96
95
97
96
Error MTKLlamaRunner::load () {
@@ -122,7 +121,6 @@ Error MTKLlamaRunner::generate(
122
121
int32_t seq_len,
123
122
std::function<void (const std::string&)> token_callback,
124
123
std::function<void(const Stats&)> stats_callback) {
125
-
126
124
if (!is_loaded ()) {
127
125
ET_CHECK_OK_OR_RETURN_ERROR (load ());
128
126
}
@@ -137,9 +135,9 @@ Error MTKLlamaRunner::generate(
137
135
}
138
136
};
139
137
140
- ET_LOG (Info, " Starting inference from MTKLlamaRunner" );
138
+ ET_LOG (Info, " Starting inference from MTKLlamaRunner" );
141
139
inference (*runtime_.get (), tokenizer_, prompt, wrapped_callback);
142
- ET_LOG (Info, " Completed inference from MTKLlamaRunner" );
140
+ ET_LOG (Info, " Completed inference from MTKLlamaRunner" );
143
141
144
142
return Error::Ok;
145
143
}
@@ -169,7 +167,7 @@ LlamaModelOptions MTKLlamaRunner::get_model_options() {
169
167
.cache_type = CACHE_TYPE,
170
168
.mask_type = MASK_TYPE,
171
169
.rot_emb_type = ROT_EMB_TYPE};
172
- ET_LOG (Info, " Completed get_model_options" );
170
+ ET_LOG (Info, " Completed get_model_options" );
173
171
return options;
174
172
}
175
173
@@ -179,7 +177,7 @@ LlamaModelPaths MTKLlamaRunner::get_model_paths() {
179
177
.token_embedding_path = TOKEN_EMBEDDING_PATH,
180
178
.prompt_model_paths = split (PROMPT_MODEL_PATHS, ' ,' ),
181
179
.gen_model_paths = split (GEN_MODEL_PATHS, ' ,' )};
182
- ET_LOG (Info, " Completed get_model_paths" );
180
+ ET_LOG (Info, " Completed get_model_paths" );
183
181
return model_paths;
184
182
}
185
183
@@ -325,7 +323,8 @@ Error MTKLlamaRunner::inference(
325
323
const auto first_output_token = prefill_res.get ();
326
324
327
325
// run generation mode (decoding)
328
- return gen_response (llama_runtime, tokenizer, first_output_token, token_callback);
326
+ return gen_response (
327
+ llama_runtime, tokenizer, first_output_token, token_callback);
329
328
}
330
329
331
330
std::unique_ptr<Tokenizer> MTKLlamaRunner::load_tokenizer () {
0 commit comments