Skip to content

Commit b91c20b

Browse files
cccclaifacebook-github-bot
authored andcommitted
integrate coreml delegate to llama_main (#4160)
Summary: Pull Request resolved: #4160 As title, build executorch library ``` cmake -DCMAKE_INSTALL_PREFIX=cmake-out \ -DCMAKE_BUILD_TYPE=Debug \ -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ -DEXECUTORCH_BUILD_KERNELS_CUSTOM="$CUSTOM" \ -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \ -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \ -DEXECUTORCH_BUILD_XNNPACK=OFF \ -DEXECUTORCH_BUILD_COREML=ON \ -DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \ -Bcmake-out . cmake --build cmake-out -j16 --target install --config DEBUG ``` build llama_main binary ``` cmake -DCMAKE_INSTALL_PREFIX=cmake-out \ -DCMAKE_BUILD_TYPE=Debug \ -DEXECUTORCH_BUILD_KERNELS_CUSTOM="$CUSTOM" \ -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \ -DEXECUTORCH_BUILD_XNNPACK="$XNNPACK" \ -DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \ -DEXECUTORCH_BUILD_COREML=ON \ -Bcmake-out/examples/models/llama2 \ examples/models/llama2 cmake --build cmake-out/examples/models/llama2 -j9 --config DEBUG ``` Run `llama_main` binary on mac ``` (executorch) chenlai@chenlai-mbp executorch % ./cmake-out/examples/models/llama2/llama_main --model_path coreml_llama2.pte --tokenizer_path /Users/chenlai/Documents/stories110M/tokenizer.bin I 00:00:00.028804 executorch:cpuinfo_utils.cpp:62] Reading file /sys/devices/soc0/image_version I 00:00:00.028844 executorch:cpuinfo_utils.cpp:78] Failed to open midr file /sys/devices/soc0/image_version I 00:00:00.028847 executorch:cpuinfo_utils.cpp:158] Number of efficient cores 4 I 00:00:00.028849 executorch:main.cpp:65] Resetting threadpool with num threads = 6 I 00:00:00.033847 executorch:runner.cpp:53] Creating LLaMa runner: model_path=coreml_llama2.pte, tokenizer_path=/Users/chenlai/Documents/stories110M/tokenizer.bin I 00:00:00.117320 executorch:runner.cpp:77] Reading metadata from model I 00:00:00.117374 executorch:runner.cpp:130] get_n_bos: 1 I 00:00:00.117386 executorch:runner.cpp:130] get_n_eos: 1 I 00:00:00.117392 executorch:runner.cpp:130] get_max_seq_len: 128 I 00:00:00.117399 executorch:runner.cpp:130] use_kv_cache: 1 I 00:00:00.117407 executorch:runner.cpp:130] use_sdpa_with_kv_cache: 0 I 00:00:00.117412 executorch:runner.cpp:130] append_eos_to_prompt: 0 I 00:00:00.117413 executorch:runner.cpp:128] The model does not contain enable_dynamic_shape method, using default value 0 I 00:00:00.117415 executorch:runner.cpp:130] enable_dynamic_shape: 0 I 00:00:00.128599 executorch:runner.cpp:130] get_vocab_size: 512 I 00:00:00.128615 executorch:runner.cpp:130] get_bos_id: 1 I 00:00:00.128621 executorch:runner.cpp:130] get_eos_id: 2 PyTorchObserver {"prompt_tokens":9,"generated_tokens":118,"model_load_start_ms":1720221254434,"model_load_end_ms":1720221254528,"inference_start_ms":1720221254528,"inference_end_ms":1720221254817,"prompt_eval_end_ms":1720221254563,"first_token_ms":1720221254563,"aggregate_sampling_time_ms":9,"SCALING_FACTOR_UNITS_PER_SECOND":1000} I 00:00:00.417257 executorch:runner.cpp:509] Prompt Tokens: 9 Generated Tokens: 118 I 00:00:00.417260 executorch:runner.cpp:515] Model Load Time: 0.094000 (seconds) I 00:00:00.417266 executorch:runner.cpp:525] Total inference time: 0.289000 (seconds) Rate: 408.304498 (tokens/second) I 00:00:00.417268 executorch:runner.cpp:533] Prompt evaluation: 0.035000 (seconds) Rate: 257.142857 (tokens/second) I 00:00:00.417270 executorch:runner.cpp:544] Generated 118 tokens: 0.254000 (seconds) Rate: 464.566929 (tokens/second) I 00:00:00.417272 executorch:runner.cpp:552] Time to first generated token: 0.035000 (seconds) I 00:00:00.417274 executorch:runner.cpp:559] Sampling time over 127 tokens: 0.009000 (seconds) ``` ghstack-source-id: 232863706 Reviewed By: kirklandsign Differential Revision: D59412309 fbshipit-source-id: a31ac59e616d7333323a5c7961b8b7dafe2d45e5
1 parent e4eeadc commit b91c20b

File tree

3 files changed

+22
-0
lines changed

3 files changed

+22
-0
lines changed

backends/apple/coreml/CMakeLists.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,3 +187,10 @@ set(TARGET coremldelegate APPEND_STRING PROPERTY COMPILE_FLAGS
187187
set(TARGET coremldelegate APPEND_STRING PROPERTY COMPILE_FLAGS
188188
"-Wno-receiver-expr"
189189
)
190+
191+
install(
192+
TARGETS coremldelegate
193+
DESTINATION lib
194+
INCLUDES
195+
DESTINATION ${_common_include_directories}
196+
)

build/executorch-config.cmake

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ set(lib_list
3939
bundled_program
4040
extension_data_loader
4141
${FLATCCRT_LIB}
42+
coremldelegate
4243
mpsdelegate
4344
qnn_executorch_backend
4445
portable_ops_lib

examples/models/llama2/CMakeLists.txt

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,20 @@ if(TARGET mpsdelegate)
189189
target_link_options_shared_lib(mpsdelegate)
190190
endif()
191191

192+
if(TARGET coremldelegate)
193+
find_library(SQLITE_LIBRARY sqlite3)
194+
list(
195+
APPEND
196+
link_libraries
197+
coremldelegate
198+
sqlite3
199+
"-framework Foundation"
200+
"-framework CoreML"
201+
"-framework Accelerate"
202+
)
203+
target_link_options_shared_lib(coremldelegate)
204+
endif()
205+
192206
# This one is needed for cpuinfo where it uses android specific log lib
193207
if(ANDROID)
194208
list(APPEND link_libraries log)

0 commit comments

Comments
 (0)