Skip to content

Commit 78df8b5

Browse files
committed
lintrunner formatting
1 parent 6aeadfe commit 78df8b5

File tree

4 files changed

+54
-52
lines changed

4 files changed

+54
-52
lines changed

examples/mediatek/executor_runner/llama_runner/llm_helper/include/llama_runner_values.h

Lines changed: 33 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -11,32 +11,36 @@
1111
#pragma once
1212

1313
namespace mtk::vars {
14-
using example::llm_helper::LLMType;
15-
16-
// Sizes
17-
const size_t PROMPT_TOKEN_BATCH_SIZE = 128;
18-
const size_t CACHE_SIZE = 512;
19-
const size_t HIDDEN_SIZE = 4096;
20-
const size_t NUM_HEAD = 32;
21-
const size_t NUM_LAYER = 32;
22-
const size_t MAX_TOKEN_LENGTH = 8192;
23-
const double ROT_EMB_BASE = 500000;
24-
25-
// Types
26-
const LLMType MODEL_INPUT_TYPE = LLMType::FP32;
27-
const LLMType MODEL_OUTPUT_TYPE = LLMType::FP32;
28-
const LLMType CACHE_TYPE = LLMType::FP32;
29-
const LLMType MASK_TYPE = LLMType::FP32;
30-
const LLMType ROT_EMB_TYPE = LLMType::FP32;
31-
32-
// Paths
33-
const std::string TOKENIZER_PATH="/data/local/tmp/et-mtk/llama3/tokenizer.model";
34-
const std::string TOKEN_EMBEDDING_PATH="/data/local/tmp/et-mtk/llama3/embedding_llama3-8B-instruct_fp32.bin";
35-
36-
// Comma-Separated Paths
37-
const std::string PROMPT_MODEL_PATHS="/data/local/tmp/et-mtk/llama3/llama3-8B-instruct_A16W4_4_chunks_128t512c_0.pte,/data/local/tmp/et-mtk/llama3/llama3-8B-instruct_A16W4_4_chunks_128t512c_1.pte,/data/local/tmp/et-mtk/llama3/llama3-8B-instruct_A16W4_4_chunks_128t512c_2.pte,/data/local/tmp/et-mtk/llama3/llama3-8B-instruct_A16W4_4_chunks_128t512c_3.pte,";
38-
39-
// Comma-Separated Paths
40-
const std::string GEN_MODEL_PATHS="/data/local/tmp/et-mtk/llama3/llama3-8B-instruct_A16W4_4_chunks_1t512c_0.pte,/data/local/tmp/et-mtk/llama3/llama3-8B-instruct_A16W4_4_chunks_1t512c_1.pte,/data/local/tmp/et-mtk/llama3/llama3-8B-instruct_A16W4_4_chunks_1t512c_2.pte,/data/local/tmp/et-mtk/llama3/llama3-8B-instruct_A16W4_4_chunks_1t512c_3.pte,";
41-
42-
} // namespace mtk:vars
14+
using example::llm_helper::LLMType;
15+
16+
// Sizes
17+
const size_t PROMPT_TOKEN_BATCH_SIZE = 128;
18+
const size_t CACHE_SIZE = 512;
19+
const size_t HIDDEN_SIZE = 4096;
20+
const size_t NUM_HEAD = 32;
21+
const size_t NUM_LAYER = 32;
22+
const size_t MAX_TOKEN_LENGTH = 8192;
23+
const double ROT_EMB_BASE = 500000;
24+
25+
// Types
26+
const LLMType MODEL_INPUT_TYPE = LLMType::FP32;
27+
const LLMType MODEL_OUTPUT_TYPE = LLMType::FP32;
28+
const LLMType CACHE_TYPE = LLMType::FP32;
29+
const LLMType MASK_TYPE = LLMType::FP32;
30+
const LLMType ROT_EMB_TYPE = LLMType::FP32;
31+
32+
// Paths
33+
const std::string TOKENIZER_PATH =
34+
"/data/local/tmp/et-mtk/llama3/tokenizer.model";
35+
const std::string TOKEN_EMBEDDING_PATH =
36+
"/data/local/tmp/et-mtk/llama3/embedding_llama3-8B-instruct_fp32.bin";
37+
38+
// Comma-Separated Paths
39+
const std::string PROMPT_MODEL_PATHS =
40+
"/data/local/tmp/et-mtk/llama3/llama3-8B-instruct_A16W4_4_chunks_128t512c_0.pte,/data/local/tmp/et-mtk/llama3/llama3-8B-instruct_A16W4_4_chunks_128t512c_1.pte,/data/local/tmp/et-mtk/llama3/llama3-8B-instruct_A16W4_4_chunks_128t512c_2.pte,/data/local/tmp/et-mtk/llama3/llama3-8B-instruct_A16W4_4_chunks_128t512c_3.pte,";
41+
42+
// Comma-Separated Paths
43+
const std::string GEN_MODEL_PATHS =
44+
"/data/local/tmp/et-mtk/llama3/llama3-8B-instruct_A16W4_4_chunks_1t512c_0.pte,/data/local/tmp/et-mtk/llama3/llama3-8B-instruct_A16W4_4_chunks_1t512c_1.pte,/data/local/tmp/et-mtk/llama3/llama3-8B-instruct_A16W4_4_chunks_1t512c_2.pte,/data/local/tmp/et-mtk/llama3/llama3-8B-instruct_A16W4_4_chunks_1t512c_3.pte,";
45+
46+
} // namespace mtk::vars

examples/mediatek/executor_runner/mtk_llama_runner.cpp

Lines changed: 14 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,8 @@
4444
* any receiver's applicable license agreements with MediaTek Inc.
4545
*/
4646

47-
#include "executorch/backends/mediatek/runtime/include/NeuronBufferAllocator.h"
4847
#include <executorch/examples/mediatek/executor_runner/mtk_llama_runner.h>
48+
#include "executorch/backends/mediatek/runtime/include/NeuronBufferAllocator.h"
4949

5050
#include <ctime>
5151
#include <iostream>
@@ -65,8 +65,8 @@
6565

6666
#include "llama_runner/ModelChunk.h"
6767
#include "llama_runner/Utils.h"
68-
#include "llama_runner/llm_helper/include/llm_types.h"
6968
#include "llama_runner/llm_helper/include/llama_runner_values.h"
69+
#include "llama_runner/llm_helper/include/llm_types.h"
7070

7171
static uint64_t MAX_RESPONSE = 50; // Maximum number of tokens to generate.
7272
// Global BOS and EOS option for tokenization (encoding)
@@ -83,15 +83,14 @@ using namespace mtk::vars;
8383
namespace llm = ::executorch::extension::llm;
8484

8585
MTKLlamaRunner::MTKLlamaRunner(
86-
const std::string& model_path,
87-
const std::string& tokenizer_path,
88-
const float temperature)
89-
: modeloptions_(get_model_options()),
90-
modelpaths_(get_model_paths()) {
86+
const std::string& model_path,
87+
const std::string& tokenizer_path,
88+
const float temperature)
89+
: modeloptions_(get_model_options()), modelpaths_(get_model_paths()) {
9190
executorch::runtime::runtime_init();
9291
ET_LOG(
93-
Info,
94-
"Creating MTK Llama runner. Current it will self-load .pte, .bin, and .so files. Initiated runtime_init().");
92+
Info,
93+
"Creating MTK Llama runner. Current it will self-load .pte, .bin, and .so files. Initiated runtime_init().");
9594
}
9695

9796
Error MTKLlamaRunner::load() {
@@ -122,7 +121,6 @@ Error MTKLlamaRunner::generate(
122121
int32_t seq_len,
123122
std::function<void(const std::string&)> token_callback,
124123
std::function<void(const Stats&)> stats_callback) {
125-
126124
if (!is_loaded()) {
127125
ET_CHECK_OK_OR_RETURN_ERROR(load());
128126
}
@@ -137,9 +135,9 @@ Error MTKLlamaRunner::generate(
137135
}
138136
};
139137

140-
ET_LOG(Info, "Starting inference from MTKLlamaRunner");
138+
ET_LOG(Info, "Starting inference from MTKLlamaRunner");
141139
inference(*runtime_.get(), tokenizer_, prompt, wrapped_callback);
142-
ET_LOG(Info, "Completed inference from MTKLlamaRunner");
140+
ET_LOG(Info, "Completed inference from MTKLlamaRunner");
143141

144142
return Error::Ok;
145143
}
@@ -169,7 +167,7 @@ LlamaModelOptions MTKLlamaRunner::get_model_options() {
169167
.cache_type = CACHE_TYPE,
170168
.mask_type = MASK_TYPE,
171169
.rot_emb_type = ROT_EMB_TYPE};
172-
ET_LOG(Info, "Completed get_model_options");
170+
ET_LOG(Info, "Completed get_model_options");
173171
return options;
174172
}
175173

@@ -179,7 +177,7 @@ LlamaModelPaths MTKLlamaRunner::get_model_paths() {
179177
.token_embedding_path = TOKEN_EMBEDDING_PATH,
180178
.prompt_model_paths = split(PROMPT_MODEL_PATHS, ','),
181179
.gen_model_paths = split(GEN_MODEL_PATHS, ',')};
182-
ET_LOG(Info, "Completed get_model_paths");
180+
ET_LOG(Info, "Completed get_model_paths");
183181
return model_paths;
184182
}
185183

@@ -325,7 +323,8 @@ Error MTKLlamaRunner::inference(
325323
const auto first_output_token = prefill_res.get();
326324

327325
// run generation mode (decoding)
328-
return gen_response(llama_runtime, tokenizer, first_output_token, token_callback);
326+
return gen_response(
327+
llama_runtime, tokenizer, first_output_token, token_callback);
329328
}
330329

331330
std::unique_ptr<Tokenizer> MTKLlamaRunner::load_tokenizer() {

examples/mediatek/executor_runner/mtk_llama_runner.h

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,14 @@
1111

1212
#pragma once
1313

14+
#include <executorch/examples/models/llama2/tokenizer/llama_tiktoken.h>
15+
#include <executorch/extension/llm/runner/stats.h>
16+
#include <executorch/extension/llm/tokenizer/bpe_tokenizer.h>
17+
#include <executorch/extension/llm/tokenizer/tiktoken.h>
1418
#include <cstdint>
1519
#include <functional>
1620
#include <memory>
1721
#include <string>
18-
#include <executorch/extension/llm/runner/stats.h>
19-
#include <executorch/examples/models/llama2/tokenizer/llama_tiktoken.h>
20-
#include <executorch/extension/llm/tokenizer/bpe_tokenizer.h>
21-
#include <executorch/extension/llm/tokenizer/tiktoken.h>
2222

2323
#include "llama_runner/LlamaConfig.h"
2424
#include "llama_runner/LlamaRuntime.h"
@@ -65,7 +65,6 @@ class MTKLlamaRunner {
6565
std::function<void(const std::string&)> token_callback);
6666
std::unique_ptr<Tokenizer> load_tokenizer();
6767

68-
6968
private:
7069
// model
7170
const LlamaModelOptions modeloptions_;

extension/android/jni/jni_layer_llama.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -119,9 +119,9 @@ class ExecuTorchLlamaJni
119119
temperature);
120120
} else if (model_type_category == MODEL_TYPE_MEDIATEK_LLAMA) {
121121
mtk_llama_runner_ = std::make_unique<MTKLlamaRunner>(
122-
model_path->toStdString().c_str(),
123-
tokenizer_path->toStdString().c_str(),
124-
temperature);
122+
model_path->toStdString().c_str(),
123+
tokenizer_path->toStdString().c_str(),
124+
temperature);
125125
}
126126
}
127127

0 commit comments

Comments
 (0)