-
Notifications
You must be signed in to change notification settings - Fork 607
MTK Android Llama Runner #6208
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
MTK Android Llama Runner #6208
Changes from all commits
22a1264
5aa82ad
00946af
54123a4
55eba6f
5ef4ed2
314c8dd
cdbcab2
56a83dd
12993c8
6db8726
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
/* | ||
* Copyright (c) Meta Platforms, Inc. and affiliates. | ||
* All rights reserved. | ||
* | ||
* This source code is licensed under the BSD-style license found in the | ||
* LICENSE file in the root directory of this source tree. | ||
*/ | ||
|
||
// Contains values that are used by the mtk_llama_runner.cpp | ||
|
||
#pragma once | ||
cmodi-meta marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
namespace mtk::vars { | ||
using example::llm_helper::LLMType; | ||
|
||
// Sizes | ||
const size_t PROMPT_TOKEN_BATCH_SIZE = 128; | ||
const size_t CACHE_SIZE = 512; | ||
const size_t HIDDEN_SIZE = 4096; | ||
const size_t NUM_HEAD = 32; | ||
const size_t NUM_LAYER = 32; | ||
const size_t MAX_TOKEN_LENGTH = 8192; | ||
const double ROT_EMB_BASE = 500000; | ||
|
||
// Types | ||
const LLMType MODEL_INPUT_TYPE = LLMType::FP32; | ||
const LLMType MODEL_OUTPUT_TYPE = LLMType::FP32; | ||
const LLMType CACHE_TYPE = LLMType::FP32; | ||
const LLMType MASK_TYPE = LLMType::FP32; | ||
const LLMType ROT_EMB_TYPE = LLMType::FP32; | ||
|
||
// Paths | ||
const std::string TOKENIZER_PATH = | ||
"/data/local/tmp/et-mtk/llama3/tokenizer.model"; | ||
const std::string TOKEN_EMBEDDING_PATH = | ||
"/data/local/tmp/et-mtk/llama3/embedding_llama3-8B-instruct_fp32.bin"; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Need to fix those There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. for right now, tokenizer path, token embedding path and model paths will be hardcoded in aar. We will then make changes to see if we want to have a different flow. |
||
|
||
// Comma-Separated Paths | ||
const std::string PROMPT_MODEL_PATHS = | ||
"/data/local/tmp/et-mtk/llama3/llama3-8B-instruct_A16W4_4_chunks_128t512c_0.pte,/data/local/tmp/et-mtk/llama3/llama3-8B-instruct_A16W4_4_chunks_128t512c_1.pte,/data/local/tmp/et-mtk/llama3/llama3-8B-instruct_A16W4_4_chunks_128t512c_2.pte,/data/local/tmp/et-mtk/llama3/llama3-8B-instruct_A16W4_4_chunks_128t512c_3.pte,"; | ||
|
||
// Comma-Separated Paths | ||
const std::string GEN_MODEL_PATHS = | ||
"/data/local/tmp/et-mtk/llama3/llama3-8B-instruct_A16W4_4_chunks_1t512c_0.pte,/data/local/tmp/et-mtk/llama3/llama3-8B-instruct_A16W4_4_chunks_1t512c_1.pte,/data/local/tmp/et-mtk/llama3/llama3-8B-instruct_A16W4_4_chunks_1t512c_2.pte,/data/local/tmp/et-mtk/llama3/llama3-8B-instruct_A16W4_4_chunks_1t512c_3.pte,"; | ||
|
||
} // namespace mtk::vars |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I thought we bump to more recent version?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Seems that here 26 is needed? Thought 30 is stricter.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
26 is needed here since otherwise I get errors in the build like attached
