|
| 1 | +/* |
| 2 | + * Copyright (c) Meta Platforms, Inc. and affiliates. |
| 3 | + * All rights reserved. |
| 4 | + * |
| 5 | + * This source code is licensed under the BSD-style license found in the |
| 6 | + * LICENSE file in the root directory of this source tree. |
| 7 | + */ |
| 8 | + |
| 9 | +// Contains values that are used by the mtk_llama_runner.cpp |
| 10 | + |
| 11 | +#pragma once |
| 12 | + |
| 13 | +namespace mtk::vars { |
| 14 | +using example::llm_helper::LLMType; |
| 15 | + |
| 16 | +// Sizes |
| 17 | +const size_t PROMPT_TOKEN_BATCH_SIZE = 128; |
| 18 | +const size_t CACHE_SIZE = 512; |
| 19 | +const size_t HIDDEN_SIZE = 4096; |
| 20 | +const size_t NUM_HEAD = 32; |
| 21 | +const size_t NUM_LAYER = 32; |
| 22 | +const size_t MAX_TOKEN_LENGTH = 8192; |
| 23 | +const double ROT_EMB_BASE = 500000; |
| 24 | + |
| 25 | +// Types |
| 26 | +const LLMType MODEL_INPUT_TYPE = LLMType::FP32; |
| 27 | +const LLMType MODEL_OUTPUT_TYPE = LLMType::FP32; |
| 28 | +const LLMType CACHE_TYPE = LLMType::FP32; |
| 29 | +const LLMType MASK_TYPE = LLMType::FP32; |
| 30 | +const LLMType ROT_EMB_TYPE = LLMType::FP32; |
| 31 | + |
| 32 | +// Paths |
| 33 | +const std::string TOKENIZER_PATH = |
| 34 | + "/data/local/tmp/et-mtk/llama3/tokenizer.model"; |
| 35 | +const std::string TOKEN_EMBEDDING_PATH = |
| 36 | + "/data/local/tmp/et-mtk/llama3/embedding_llama3-8B-instruct_fp32.bin"; |
| 37 | + |
| 38 | +// Comma-Separated Paths |
| 39 | +const std::string PROMPT_MODEL_PATHS = |
| 40 | + "/data/local/tmp/et-mtk/llama3/llama3-8B-instruct_A16W4_4_chunks_128t512c_0.pte,/data/local/tmp/et-mtk/llama3/llama3-8B-instruct_A16W4_4_chunks_128t512c_1.pte,/data/local/tmp/et-mtk/llama3/llama3-8B-instruct_A16W4_4_chunks_128t512c_2.pte,/data/local/tmp/et-mtk/llama3/llama3-8B-instruct_A16W4_4_chunks_128t512c_3.pte,"; |
| 41 | + |
| 42 | +// Comma-Separated Paths |
| 43 | +const std::string GEN_MODEL_PATHS = |
| 44 | + "/data/local/tmp/et-mtk/llama3/llama3-8B-instruct_A16W4_4_chunks_1t512c_0.pte,/data/local/tmp/et-mtk/llama3/llama3-8B-instruct_A16W4_4_chunks_1t512c_1.pte,/data/local/tmp/et-mtk/llama3/llama3-8B-instruct_A16W4_4_chunks_1t512c_2.pte,/data/local/tmp/et-mtk/llama3/llama3-8B-instruct_A16W4_4_chunks_1t512c_3.pte,"; |
| 45 | + |
| 46 | +} // namespace mtk::vars |
0 commit comments