Skip to content

MTK build test #5301

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 23 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
0d55652
Update Android build for MTK
kirklandsign Sep 4, 2024
4ce04e8
Install neuron_backend in executorch-config
kirklandsign Sep 4, 2024
902569c
Create a MTK Runner to be able to run with a mobile app
cmodi-meta Aug 29, 2024
b21f1f6
TEST ONLY try to build mtk stuff
kirklandsign Sep 4, 2024
72428f5
Fix Runner compilation errors
cmodi-meta Sep 5, 2024
626858e
Debug in progress
kirklandsign Sep 5, 2024
7242dec
find lib
kirklandsign Sep 5, 2024
605c0dd
aar generates but with libneuron_backend.so error
cmodi-meta Sep 11, 2024
0c2d041
trying to fix libneuron_backend.so error
cmodi-meta Sep 11, 2024
fda0f62
includes libneuron_backend.so error but issue with neuron_backend
cmodi-meta Sep 12, 2024
8cab38e
resolves neuron_backend issue but now issue on .so or properly linkin…
cmodi-meta Sep 12, 2024
f28a2a9
Merge remote-tracking branch 'origin/main' into mtk-3
kirklandsign Sep 12, 2024
e5166ac
hack route to mtk runner
kirklandsign Sep 12, 2024
22d9b11
Make 1 until cmodi changes java side lol
kirklandsign Sep 12, 2024
6107dc7
compilation fixes
cmodi-meta Sep 12, 2024
1aa2ccd
resolved no op issue and bug in modelType. Issue with loading models
cmodi-meta Sep 12, 2024
6fed4cb
*MILESTONE* debugging logs for model loading in aar
cmodi-meta Sep 12, 2024
8a2d9c1
debugging with moving some to shell
cmodi-meta Sep 13, 2024
72aa142
many debug prints
cmodi-meta Sep 16, 2024
91e4d6a
logs and app changes for debug
cmodi-meta Sep 17, 2024
aa98fb1
so adds in androidmanifest and embedding file name correction
cmodi-meta Sep 17, 2024
29f3333
.so adds in AndroidManifest and fix type for embedding file name
cmodi-meta Sep 27, 2024
a4b4279
add Error returns to runner. Baseline working flow.
cmodi-meta Sep 27, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion backends/mediatek/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,10 @@ include_directories(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}/runtime/include)

# targets
add_library(neuron_backend SHARED)
target_compile_options(neuron_backend PRIVATE "-frtti" "-fexceptions")
target_link_libraries(neuron_backend
PRIVATE
executorch_no_prim_ops
portable_ops_lib
android
log
${NEURON_BUFFER_ALLOCATOR_LIB}
Expand Down
2 changes: 1 addition & 1 deletion backends/mediatek/runtime/include/NeuronLog.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

#pragma once

#include <api/NeuronAdapter.h>
#include "api/NeuronAdapter.h"

#include <android/log.h>
#include <sys/system_properties.h>
Expand Down
23 changes: 16 additions & 7 deletions build/build_android_llm_demo.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ build_android_native_library() {
cmake . -DCMAKE_INSTALL_PREFIX="${CMAKE_OUT}" \
-DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK}/build/cmake/android.toolchain.cmake" \
-DANDROID_ABI="${ANDROID_ABI}" \
-DANDROID_PLATFORM=android-23 \
-DANDROID_PLATFORM=android-26 \
-DEXECUTORCH_ENABLE_LOGGING=ON \
-DEXECUTORCH_LOG_LEVEL=Info \
-DEXECUTORCH_BUILD_XNNPACK=ON \
Expand All @@ -42,36 +42,44 @@ build_android_native_library() {
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
-DEXECUTORCH_BUILD_NEURON=ON \
-DNEURON_BUFFER_ALLOCATOR_LIB="$NEURON_BUFFER_ALLOCATOR_LIB" \
-DEXECUTORCH_BUILD_QNN="${EXECUTORCH_BUILD_QNN}" \
-DQNN_SDK_ROOT="${QNN_SDK_ROOT}" \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_BUILD_TYPE=RelWithDebInfo \
-B"${CMAKE_OUT}"

if [ "$(uname)" == "Darwin" ]; then
CMAKE_JOBS=$(( $(sysctl -n hw.ncpu) - 1 ))
else
CMAKE_JOBS=$(( $(nproc) - 1 ))
fi
cmake --build "${CMAKE_OUT}" -j "${CMAKE_JOBS}" --target install --config Release
cmake --build "${CMAKE_OUT}" -j "${CMAKE_JOBS}" --target install --config RelWithDebInfo

cmake extension/android \
-DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK}/build/cmake/android.toolchain.cmake \
-DANDROID_ABI="${ANDROID_ABI}" \
-DANDROID_PLATFORM=android-23 \
-DANDROID_PLATFORM=android-26 \
-DCMAKE_INSTALL_PREFIX="${CMAKE_OUT}" \
-DEXECUTORCH_ENABLE_LOGGING=ON \
-DEXECUTORCH_LOG_LEVEL=Info \
-DCMAKE_FIND_ROOT_PATH_MODE_PACKAGE=BOTH \
-DNEURON_BUFFER_ALLOCATOR_LIB="$NEURON_BUFFER_ALLOCATOR_LIB" \
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
-DEXECUTORCH_BUILD_LLAMA_JNI=ON \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_BUILD_TYPE=RelWithDebInfo \
-B"${CMAKE_OUT}"/extension/android

cmake --build "${CMAKE_OUT}"/extension/android -j "${CMAKE_JOBS}" --config Release
cmake --build "${CMAKE_OUT}"/extension/android -j "${CMAKE_JOBS}" --config RelWithDebInfo

# Copy artifacts to ABI specific directory
mkdir -p "${BUILD_AAR_DIR}/jni/${ANDROID_ABI}"
cp "${CMAKE_OUT}"/extension/android/*.so "${BUILD_AAR_DIR}/jni/${ANDROID_ABI}/"

cp "${CMAKE_OUT}"/backends/mediatek/libneuron_backend.so ${BUILD_AAR_DIR}/jni/${ANDROID_ABI}/
cp /Users/cmodi/Documents/ai/clean/executorch/backends/mediatek/libneuron_buffer_allocator.so ${BUILD_AAR_DIR}/jni/${ANDROID_ABI}/
cp /Users/cmodi/Documents/ai/clean/executorch/backends/mediatek/libneuronusdk_adapter.mtk.so ${BUILD_AAR_DIR}/jni/${ANDROID_ABI}/

# Copy QNN related so library
if [ -n "$QNN_SDK_ROOT" ] && [ "$ANDROID_ABI" == "arm64-v8a" ]; then
cp "${CMAKE_OUT}"/lib/libqnn_executorch_backend.so "${BUILD_AAR_DIR}/jni/${ANDROID_ABI}/"
Expand All @@ -97,7 +105,7 @@ build_aar() {
find jni -type f -name "libexecutorch_jni.so" -exec bash -c 'mv "$1" "${1/_jni/}"' bash {} \;
# Zip all necessary files into the AAR file
zip -r executorch.aar libs jni/*/libexecutorch.so jni/*/libqnn*.so jni/*/libQnn*.so AndroidManifest.xml
zip -r executorch-llama.aar libs jni/*/libexecutorch.so jni/*/libqnn*.so jni/*/libQnn*.so AndroidManifest.xml
zip -r executorch-llama.aar libs jni/*/libexecutorch.so jni/*/libqnn*.so jni/*/libQnn*.so jni/*/libneuron_backend.so jni/*/libneuron_buffer_allocator.so jni/*/libneuronusdk_adapter.mtk.so AndroidManifest.xml
popd
}

Expand Down Expand Up @@ -143,6 +151,7 @@ BUILD_AAR_DIR="$(mktemp -d)"
export BUILD_AAR_DIR
if [ -z "$ANDROID_ABIS" ]; then
ANDROID_ABIS=("arm64-v8a" "x86_64")
ANDROID_ABIS=("arm64-v8a")
fi
export ANDROID_ABIS

Expand Down
1 change: 1 addition & 0 deletions build/executorch-config.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ set(lib_list
${FLATCCRT_LIB}
coremldelegate
mpsdelegate
neuron_backend
qnn_executorch_backend
portable_ops_lib
extension_module
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ dependencies {
implementation("androidx.constraintlayout:constraintlayout:2.2.0-alpha12")
implementation("com.facebook.fbjni:fbjni:0.5.1")
implementation("com.google.code.gson:gson:2.8.6")
implementation(files("libs/executorch-llama.aar"))
implementation(files("libs/executorch-llama-mtk31.aar"))
implementation("com.google.android.material:material:1.12.0")
implementation("androidx.activity:activity:1.9.0")
testImplementation("junit:junit:4.13.2")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,30 @@
android:name="libcdsprpc.so"
android:required="false" />

<uses-native-library
android:name="libapuwareutils_v2.mtk.so"
android:required="false" />

<uses-native-library
android:name="libapuwareapusys_v2.mtk.so"
android:required="false" />

<uses-native-library
android:name="libnir_neon_driver_ndk.mtk.so"
android:required="false" />

<uses-native-library
android:name="libnir_neon_driver_ndk.mtk.vndk.so"
android:required="false" />

<uses-native-library
android:name="libcmdl_ndk.mtk.vndk.so"
android:required="false" />

<uses-native-library
android:name="libcmdl_ndk.mtk.so"
android:required="false" />

<activity
android:name=".MainActivity"
android:exported="true"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ private void setLocalModel(String modelPath, String tokenizerPath, float tempera
modelPath,
tokenizerPath,
temperature);
ETLogging.getInstance().log("ModelType is: " + mCurrentSettingsFields.getModelType());
int loadResult = mModule.load();
long loadDuration = System.currentTimeMillis() - runStartTime;
String modelLoadError = "";
Expand Down Expand Up @@ -156,11 +157,11 @@ private void setLocalModel(String modelPath, String tokenizerPath, float tempera
+ " sec."
+ " You can send text or image for inference";

if (mCurrentSettingsFields.getModelType() == ModelType.LLAVA_1_5) {
/*if (mCurrentSettingsFields.getModelType() == ModelType.LLAVA_1_5) {
ETLogging.getInstance().log("Llava start prefill prompt");
startPos = mModule.prefillPrompt(PromptFormat.getLlavaPresetPrompt(), 0, 1, 0);
ETLogging.getInstance().log("Llava completes prefill prompt");
}
}*/
}

Message modelLoadedMessage = new Message(modelInfo, false, MessageType.SYSTEM, 0);
Expand Down Expand Up @@ -226,6 +227,9 @@ protected void onCreate(Bundle savedInstanceState) {

try {
Os.setenv("ADSP_LIBRARY_PATH", getApplicationInfo().nativeLibraryDir, true);
Os.setenv("LD_LIBRARY_PATH", getApplicationInfo().nativeLibraryDir, true);
ETLogging.getInstance().log("cmodiiiii ADSP_LIBRARY_PATH is: " + Os.getenv("ADSP_LIBRARY_PATH"));
ETLogging.getInstance().log("cmodiiiii LD_LIBRARY_PATH is: " + Os.getenv("LD_LIBRARY_PATH"));
} catch (ErrnoException e) {
finish();
}
Expand Down Expand Up @@ -566,7 +570,7 @@ private void showMediaPreview(List<Uri> uris) {

// For LLava, we want to call prefill_image as soon as an image is selected
// Llava only support 1 image for now
if (mCurrentSettingsFields.getModelType() == ModelType.LLAVA_1_5) {
/* if (mCurrentSettingsFields.getModelType() == ModelType.LLAVA_1_5) {
List<ETImage> processedImageList = getProcessedImagesForModel(mSelectedImageUri);
if (!processedImageList.isEmpty()) {
mMessageAdapter.add(
Expand All @@ -588,7 +592,7 @@ private void showMediaPreview(List<Uri> uris) {
};
executor.execute(runnable);
}
}
}*/
}

private void addSelectedImagesToChatThread(List<Uri> selectedImageUri) {
Expand Down Expand Up @@ -689,24 +693,23 @@ public void run() {
}
});
long generateStartTime = System.currentTimeMillis();
if (ModelUtils.getModelCategory(mCurrentSettingsFields.getModelType())
/* if (ModelUtils.getModelCategory(mCurrentSettingsFields.getModelType())
== ModelUtils.VISION_MODEL) {
mModule.generateFromPos(
mCurrentSettingsFields.getFormattedSystemAndUserPrompt(rawPrompt),
ModelUtils.VISION_MODEL_SEQ_LEN,
startPos,
MainActivity.this,
false);
} else {
} else {*/
String finalPrompt =
getTotalFormattedPrompt(getConversationHistory(), rawPrompt);
ETLogging.getInstance().log("Running inference.. prompt=" + finalPrompt);
mModule.generate(
finalPrompt,
(int) (finalPrompt.length() * 0.75) + 64,
MainActivity.this,
false);
}
MainActivity.this);
//}

long generateDuration = System.currentTimeMillis() - generateStartTime;
mResultMessage.setTotalGenerationTime(generateDuration);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ public class SettingsActivity extends AppCompatActivity {

private DemoSharedPreferences mDemoSharedPreferences;
public static double TEMPERATURE_MIN_VALUE = 0.0;
public static String MODEL_PATH="/data/local/tmp/et-mtk/llama3";
//public static String MODEL_PATH="/data/local/tmp/llama";

@Override
protected void onCreate(Bundle savedInstanceState) {
Expand Down Expand Up @@ -286,7 +288,7 @@ private void showInvalidPromptDialog() {
}

private void setupModelSelectorDialog() {
String[] pteFiles = listLocalFile("/data/local/tmp/llama/", ".pte");
String[] pteFiles = listLocalFile(MODEL_PATH, ".pte");
AlertDialog.Builder modelPathBuilder = new AlertDialog.Builder(this);
modelPathBuilder.setTitle("Select model path");

Expand Down Expand Up @@ -342,7 +344,7 @@ private void setupModelTypeSelectorDialog() {
}

private void setupTokenizerSelectorDialog() {
String[] binFiles = listLocalFile("/data/local/tmp/llama/", ".bin");
String[] binFiles = listLocalFile(MODEL_PATH, ".bin");
String[] tokenizerFiles = new String[binFiles.length];
System.arraycopy(binFiles, 0, tokenizerFiles, 0, binFiles.length);
AlertDialog.Builder tokenizerPathBuilder = new AlertDialog.Builder(this);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,16 +73,26 @@ size_t LlamaModelChunk::GetExpectedOutputCount() const {
}

void LlamaModelChunk::Initialize() {
ET_LOG(Info, "cmodiii in LlamaModelChunk::Initialize");
LoadModels();
ET_LOG(Info, "cmodiii after LoadModels");
GetModelIoInfo();
ET_LOG(Info, "cmodiii after GetModelIoInfo");
CheckIoCount();
ET_LOG(Info, "cmodiii after CheckIoCount");
PrepareCacheIOs();
ET_LOG(Info, "cmodiii after PrepareCacheIOs");
AllocateIoBuffers();
ET_LOG(Info, "cmodiii after AllocateIoBuffers");
InitMaskBuilder();
ET_LOG(Info, "cmodiii after InitMaskBuilder");
InitCache();
ET_LOG(Info, "cmodiii after InitCache");

SetBackendInputs();
ET_LOG(Info, "cmodiii after SetBackendInputs");
SetBackendOutputs();
ET_LOG(Info, "cmodiii after SetBackendOutputs");
mIsInitialized = true;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,21 @@ void LlamaRuntime::Initialize(
const size_t numCache = 2 * modelOptions.num_layer / numChunk;
ET_CHECK_MSG(numChunk > 0, "No model to initialize");

ET_LOG(Info, "cmodiii 1");
ET_LOG(Info, "cmodiii numChunk = %zu", numChunk);
ET_LOG(Info, "cmodiii numCache = %zu", numCache);

// Initialize rotary embedding master lookup table
const size_t rotEmbDim = modelOptions.hidden_size / modelOptions.num_head;
ET_LOG(Info, "cmodiii 2");
mRotEmbMasterLut = std::make_unique<llm_helper::RotaryEmbeddingMasterLut>(
modelOptions.rot_emb_type,
modelOptions.max_token_length,
rotEmbDim,
modelOptions.rot_emb_base);
ET_LOG(Info, "cmodiii 3");
mRotEmbMasterLut->generate();
ET_LOG(Info, "cmodiii 4");

constexpr size_t numRotEmbInputs = 1;
const bool usePromptModel = !modelPaths.prompt_model_paths.empty();
Expand All @@ -50,8 +57,10 @@ void LlamaRuntime::Initialize(
return;
modelPathMap[batchSize] = modelPaths[chunkIdx];
};
ET_LOG(Info, "cmodiii 5");
addModelPath(
modelPaths.prompt_model_paths, modelOptions.prompt_token_batch_size);
ET_LOG(Info, "cmodiii 6");
addModelPath(modelPaths.gen_model_paths, 1);
auto llamaChunk = std::make_unique<LlamaModelChunk>(
modelPathMap,
Expand All @@ -60,18 +69,43 @@ void LlamaRuntime::Initialize(
numCache,
numRotEmbInputs,
mRotEmbMasterLut.get());
ET_LOG(Info, "cmodiii 7");
if(llamaChunk.get() == nullptr) {
ET_LOG(Info, "cmodiii llamaChunk is null");
} else {
ET_LOG(Info, "cmodiii llamaChunk is not null");
}

mLlamaModelChunks.push_back(std::move(llamaChunk));

if(mLlamaModelChunks.empty()) {
ET_LOG(Info, "cmodiii mLlamaModelChunks is empty");
} else {
ET_LOG(Info, "cmodiii mLlamaModelChunks is not empty");
}

ET_LOG(Info, "cmodiii 8");
}

for (size_t i = 0; i < numChunk; i++) {
auto& modelChunk = mLlamaModelChunks[i];
if(modelChunk.get() == nullptr) {
ET_LOG(Info, "cmodiii modelChunk is null");
} else {
ET_LOG(Info, "cmodiii modelChunk is not null");
}
ET_LOG(Info, "cmodiii 9");
if (i > 0) {
const auto& prevModelChunk = mLlamaModelChunks[i - 1];
ET_LOG(Info, "cmodiii 9A");
modelChunk->SetInputBuffer(prevModelChunk->GetOutputBuffer());
ET_LOG(Info, "cmodiii 10");
}
modelChunk->Initialize();
ET_LOG(Info, "cmodiii 11");
// modelChunk->LogIoSummary();
}
ET_LOG(Info, "cmodiii 12");

// NOTE: Token embedding type here is assumed to follow the model input
// embedding type.
Expand All @@ -80,9 +114,13 @@ void LlamaRuntime::Initialize(
modelOptions.model_input_type,
modelOptions.hidden_size);

ET_LOG(Info, "cmodiii 13");

// Link first chunk emb input to token emb lut output
const auto& tokenEmbInput = mLlamaModelChunks.front()->GetInputBuffer();
ET_LOG(Info, "cmodiii 14");
mTokenEmbLut->setOutput(tokenEmbInput.data, tokenEmbInput.nbytes);
ET_LOG(Info, "cmodiii 15");
}

void LlamaRuntime::Release() {
Expand Down Expand Up @@ -201,4 +239,4 @@ const LlamaModelOptions& LlamaRuntime::GetModelOptions() const {
return mModelOptions;
}

} // namespace torch::executor
} // namespace torch::executor
Loading