Skip to content

Commit 47bca20

Browse files
authored
MTK Android Llama Runner
Differential Revision: D64776059 Pull Request resolved: #6208
1 parent 1f38016 commit 47bca20

File tree

6 files changed

+502
-1
lines changed

6 files changed

+502
-1
lines changed

build/build_android_llm_demo.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ build_android_native_library() {
3838
cmake . -DCMAKE_INSTALL_PREFIX="${CMAKE_OUT}" \
3939
-DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK}/build/cmake/android.toolchain.cmake" \
4040
-DANDROID_ABI="${ANDROID_ABI}" \
41+
-DANDROID_PLATFORM=android-26 \
4142
-DEXECUTORCH_ENABLE_LOGGING=ON \
4243
-DEXECUTORCH_LOG_LEVEL=Info \
4344
-DEXECUTORCH_BUILD_XNNPACK=ON \
@@ -66,6 +67,7 @@ build_android_native_library() {
6667
cmake extension/android \
6768
-DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK}/build/cmake/android.toolchain.cmake \
6869
-DANDROID_ABI="${ANDROID_ABI}" \
70+
-DANDROID_PLATFORM=android-26 \
6971
-DCMAKE_INSTALL_PREFIX="${CMAKE_OUT}" \
7072
-DEXECUTORCH_ENABLE_LOGGING=ON \
7173
-DEXECUTORCH_LOG_LEVEL=Info \
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
// Contains values that are used by the mtk_llama_runner.cpp
10+
11+
#pragma once
12+
13+
namespace mtk::vars {
14+
using example::llm_helper::LLMType;
15+
16+
// Sizes
17+
const size_t PROMPT_TOKEN_BATCH_SIZE = 128;
18+
const size_t CACHE_SIZE = 512;
19+
const size_t HIDDEN_SIZE = 4096;
20+
const size_t NUM_HEAD = 32;
21+
const size_t NUM_LAYER = 32;
22+
const size_t MAX_TOKEN_LENGTH = 8192;
23+
const double ROT_EMB_BASE = 500000;
24+
25+
// Types
26+
const LLMType MODEL_INPUT_TYPE = LLMType::FP32;
27+
const LLMType MODEL_OUTPUT_TYPE = LLMType::FP32;
28+
const LLMType CACHE_TYPE = LLMType::FP32;
29+
const LLMType MASK_TYPE = LLMType::FP32;
30+
const LLMType ROT_EMB_TYPE = LLMType::FP32;
31+
32+
// Paths
33+
const std::string TOKENIZER_PATH =
34+
"/data/local/tmp/et-mtk/llama3/tokenizer.model";
35+
const std::string TOKEN_EMBEDDING_PATH =
36+
"/data/local/tmp/et-mtk/llama3/embedding_llama3-8B-instruct_fp32.bin";
37+
38+
// Comma-Separated Paths
39+
const std::string PROMPT_MODEL_PATHS =
40+
"/data/local/tmp/et-mtk/llama3/llama3-8B-instruct_A16W4_4_chunks_128t512c_0.pte,/data/local/tmp/et-mtk/llama3/llama3-8B-instruct_A16W4_4_chunks_128t512c_1.pte,/data/local/tmp/et-mtk/llama3/llama3-8B-instruct_A16W4_4_chunks_128t512c_2.pte,/data/local/tmp/et-mtk/llama3/llama3-8B-instruct_A16W4_4_chunks_128t512c_3.pte,";
41+
42+
// Comma-Separated Paths
43+
const std::string GEN_MODEL_PATHS =
44+
"/data/local/tmp/et-mtk/llama3/llama3-8B-instruct_A16W4_4_chunks_1t512c_0.pte,/data/local/tmp/et-mtk/llama3/llama3-8B-instruct_A16W4_4_chunks_1t512c_1.pte,/data/local/tmp/et-mtk/llama3/llama3-8B-instruct_A16W4_4_chunks_1t512c_2.pte,/data/local/tmp/et-mtk/llama3/llama3-8B-instruct_A16W4_4_chunks_1t512c_3.pte,";
45+
46+
} // namespace mtk::vars

0 commit comments

Comments
 (0)