Skip to content

Commit 8f72f80

Browse files
larryliu0820facebook-github-bot
authored andcommitted
Use new API to register custom ops for llama model (#2916)
Summary: Retry of D55713944 Differential Revision: D55856491
1 parent 6db9d72 commit 8f72f80

26 files changed

+439
-318
lines changed

.ci/scripts/test_llama.sh

Lines changed: 25 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,18 @@ if [[ -z "${MODE:-}" ]]; then
3737
exit 1
3838
fi
3939

40+
if [[ "${MODE}" =~ xnnpack.* ]]; then
41+
XNNPACK=ON
42+
else
43+
XNNPACK=OFF
44+
fi
45+
46+
if [[ "${MODE}" =~ .*custom.* ]]; then
47+
CUSTOM=ON
48+
else
49+
CUSTOM=OFF
50+
fi
51+
4052
if [[ -z "${BUCK:-}" ]]; then
4153
BUCK=buck2
4254
fi
@@ -47,38 +59,35 @@ fi
4759

4860
which "${PYTHON_EXECUTABLE}"
4961

50-
5162
cmake_install_executorch_libraries() {
5263
echo "Installing libexecutorch.a, libextension_module.so, libportable_ops_lib.a"
5364
rm -rf cmake-out
54-
if [[ "${MODE}" == "xnnpack" ]]; then
55-
XNNPACK=ON
56-
else
57-
XNNPACK=OFF
58-
fi
5965
retry cmake -DBUCK2="$BUCK" \
6066
-DCMAKE_INSTALL_PREFIX=cmake-out \
61-
-DCMAKE_BUILD_TYPE=Release \
67+
-DCMAKE_BUILD_TYPE=Debug \
6268
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
6369
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
70+
-DEXECUTORCH_BUILD_CUSTOM="$CUSTOM" \
6471
-DEXECUTORCH_BUILD_OPTIMIZED=ON \
6572
-DEXECUTORCH_BUILD_XNNPACK="$XNNPACK" \
6673
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
6774
-Bcmake-out .
68-
cmake --build cmake-out -j9 --target install --config Release
75+
cmake --build cmake-out -j9 --target install --config Debug
6976
}
7077

7178
cmake_build_llama_runner() {
7279
echo "Building llama runner"
7380
dir="examples/models/llama2"
7481
retry cmake -DBUCK2="$BUCK" \
7582
-DCMAKE_INSTALL_PREFIX=cmake-out \
76-
-DCMAKE_BUILD_TYPE=Release \
83+
-DCMAKE_BUILD_TYPE=Debug \
84+
-DEXECUTORCH_BUILD_CUSTOM="$CUSTOM" \
7785
-DEXECUTORCH_BUILD_OPTIMIZED=ON \
86+
-DEXECUTORCH_BUILD_XNNPACK="$XNNPACK" \
7887
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
7988
-Bcmake-out/${dir} \
8089
${dir}
81-
cmake --build cmake-out/${dir} -j9 --config Release
90+
cmake --build cmake-out/${dir} -j9 --config Debug
8291

8392
}
8493

@@ -113,13 +122,18 @@ else
113122
exit 1
114123
fi
115124

125+
# Install custom ops before exporting
126+
echo "Installing executorch libraries"
127+
cmake_install_executorch_libraries
128+
116129
# Export model.
117130
EXPORTED_MODEL_NAME="${EXPORTED_MODEL_NAME}.pte"
118131
echo "Exporting ${EXPORTED_MODEL_NAME}"
119132
EXPORT_ARGS="-c stories110M.pt -p ${PARAMS} -d ${DTYPE} -n ${EXPORTED_MODEL_NAME}"
120-
if [[ "${MODE}" == "xnnpack" ]]; then
133+
if [[ "${MODE}" == "xnnpack+kv+custom" ]]; then
121134
EXPORT_ARGS="${EXPORT_ARGS} -kv --use_sdpa_with_kv_cache -X -qmode 8da4w -G 128"
122135
fi
136+
# Add dynamically linked library location
123137
$PYTHON_EXECUTABLE -m examples.models.llama2.export_llama ${EXPORT_ARGS}
124138

125139
# Create tokenizer.bin.
@@ -135,7 +149,6 @@ if [[ "${BUILD_TOOL}" == "buck2" ]]; then
135149
# shellcheck source=/dev/null
136150
$BUCK run examples/models/llama2:main -- ${RUNTIME_ARGS} > result.txt
137151
elif [[ "${BUILD_TOOL}" == "cmake" ]]; then
138-
cmake_install_executorch_libraries
139152
cmake_build_llama_runner
140153
# Run llama runner
141154
NOW=$(date +"%H:%M:%S")

.github/workflows/pull.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ jobs:
9090
matrix:
9191
dtype: [fp32]
9292
build-tool: [buck2, cmake]
93-
mode: [portable, xnnpack]
93+
mode: [portable, xnnpack+kv+custom]
9494
fail-fast: false
9595
with:
9696
runner: linux.2xlarge

.github/workflows/trunk.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -254,7 +254,7 @@ jobs:
254254
matrix:
255255
dtype: [fp32]
256256
build-tool: [buck2, cmake]
257-
mode: [portable, xnnpack]
257+
mode: [portable, xnnpack+kv+custom]
258258
fail-fast: false
259259
with:
260260
runner: macos-m1-stable

CMakeLists.txt

Lines changed: 47 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -175,8 +175,9 @@ option(EXECUTORCH_BUILD_VULKAN "Build the Vulkan backend" OFF)
175175
#
176176
# pthreadpool: build pthreadpool library. Disable on unsupported platforms
177177
#
178-
cmake_dependent_option(EXECUTORCH_BUILD_PTHREADPOOL "Build pthreadpool library."
179-
ON "NOT EXECUTORCH_BUILD_ARM_BAREMETAL" OFF)
178+
cmake_dependent_option(
179+
EXECUTORCH_BUILD_PTHREADPOOL "Build pthreadpool library." ON
180+
"NOT EXECUTORCH_BUILD_ARM_BAREMETAL" OFF)
180181

181182
#
182183
# cpuinfo: build cpuinfo library. Disable on unsupported platforms
@@ -186,6 +187,9 @@ cmake_dependent_option(EXECUTORCH_BUILD_CPUINFO "Build cpuinfo library." ON
186187

187188
if(EXECUTORCH_BUILD_CPUINFO)
188189
# --- cpuinfo
190+
set(ORIGINAL_CMAKE_POSITION_INDEPENDENT_CODE_FLAG
191+
${CMAKE_POSITION_INDEPENDENT_CODE})
192+
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
189193
set(CPUINFO_SOURCE_DIR "backends/xnnpack/third-party/cpuinfo")
190194
set(CPUINFO_BUILD_TOOLS
191195
OFF
@@ -207,10 +211,15 @@ if(EXECUTORCH_BUILD_CPUINFO)
207211
CACHE STRING "")
208212
set(CLOG_SOURCE_DIR "${CPUINFO_SOURCE_DIR}/deps/clog")
209213
add_subdirectory("${CPUINFO_SOURCE_DIR}")
214+
set(CMAKE_POSITION_INDEPENDENT_CODE
215+
${ORIGINAL_CMAKE_POSITION_INDEPENDENT_CODE_FLAG})
210216
endif()
211217

212218
if(EXECUTORCH_BUILD_PTHREADPOOL)
213219
# --- pthreadpool
220+
set(ORIGINAL_CMAKE_POSITION_INDEPENDENT_CODE_FLAG
221+
${CMAKE_POSITION_INDEPENDENT_CODE})
222+
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
214223
set(PTHREADPOOL_SOURCE_DIR "backends/xnnpack/third-party/pthreadpool")
215224
set(PTHREADPOOL_BUILD_TESTS
216225
OFF
@@ -230,6 +239,8 @@ if(EXECUTORCH_BUILD_PTHREADPOOL)
230239
CACHE STRING "")
231240
endif()
232241
add_subdirectory("${PTHREADPOOL_SOURCE_DIR}")
242+
set(CMAKE_POSITION_INDEPENDENT_CODE
243+
${ORIGINAL_CMAKE_POSITION_INDEPENDENT_CODE_FLAG})
233244
endif()
234245

235246
if(NOT PYTHON_EXECUTABLE)
@@ -352,23 +363,25 @@ add_subdirectory(schema)
352363
# Only contains primitive operators; does not contain portable kernels or other
353364
# full operators. Does not contain any backends.
354365
#
355-
356-
add_library(executorch ${_executorch__srcs})
357-
target_link_libraries(executorch PRIVATE program_schema)
358-
target_link_options_shared_lib(executorch)
366+
add_library(executorch_no_prim_ops ${_executorch_no_prim_ops__srcs})
367+
target_link_libraries(executorch_no_prim_ops PRIVATE program_schema)
359368
# Check if dl exists for this toolchain and only then link it.
360369
find_library(DL_LIBRARY_EXISTS NAMES dl)
361370
# Check if the library was found
362371
if(DL_LIBRARY_EXISTS)
363-
target_link_libraries(executorch PRIVATE dl) # For dladdr()
372+
target_link_libraries(executorch_no_prim_ops PRIVATE dl) # For dladdr()
364373
endif()
365-
target_include_directories(executorch PUBLIC ${_common_include_directories})
366-
target_compile_options(executorch PUBLIC ${_common_compile_options})
374+
target_include_directories(executorch_no_prim_ops PUBLIC ${_common_include_directories})
375+
target_compile_options(executorch_no_prim_ops PUBLIC ${_common_compile_options})
367376
if(MAX_KERNEL_NUM)
368-
target_compile_definitions(executorch
377+
target_compile_definitions(executorch_no_prim_ops
369378
PRIVATE MAX_KERNEL_NUM=${MAX_KERNEL_NUM})
370379
endif()
371380

381+
add_library(executorch ${_executorch__srcs})
382+
target_link_libraries(executorch PRIVATE executorch_no_prim_ops)
383+
target_link_options_shared_lib(executorch)
384+
372385
#
373386
# portable_ops_lib: A library to register core ATen ops using portable kernels,
374387
# see kernels/portable/CMakeLists.txt.
@@ -504,25 +517,38 @@ if(EXECUTORCH_BUILD_PYBIND)
504517
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/sdk)
505518
endif()
506519

520+
# find pytorch lib, to allow pybind to take at::Tensor as input/output
521+
find_package(Torch CONFIG REQUIRED)
522+
find_library(TORCH_PYTHON_LIBRARY torch_python
523+
PATHS "${TORCH_INSTALL_PREFIX}/lib")
524+
525+
set(_dep_libs
526+
${TORCH_PYTHON_LIBRARY}
527+
bundled_program
528+
etdump
529+
executorch
530+
extension_data_loader
531+
portable_ops_lib
532+
util
533+
torch)
534+
507535
if(EXECUTORCH_BUILD_COREML)
508-
set(PYBIND_LINK_COREML "coremldelegate")
536+
list(APPEND _dep_libs coremldelegate)
509537
endif()
510538

511539
if(EXECUTORCH_BUILD_MPS)
512-
set(PYBIND_LINK_MPS "mpsdelegate")
540+
list(APPEND _dep_libs mpsdelegate)
513541
endif()
514542

515543
if(EXECUTORCH_BUILD_XNNPACK)
516-
# need to explicitly specify XNNPACK here
517-
# otherwise uses XNNPACK symbols from libtorch_cpu
518-
set(PYBIND_LINK_XNNPACK xnnpack_backend XNNPACK)
544+
# need to explicitly specify XNNPACK here otherwise uses XNNPACK symbols
545+
# from libtorch_cpu
546+
list(APPEND _dep_libs xnnpack_backend XNNPACK)
519547
endif()
520548

521-
# find pytorch lib, to allow pybind to take at::Tensor as input/output
522-
find_package(Torch CONFIG REQUIRED)
523-
find_library(TORCH_PYTHON_LIBRARY torch_python
524-
PATHS "${TORCH_INSTALL_PREFIX}/lib")
525-
549+
if(EXECUTORCH_BUILD_CUSTOM)
550+
list(APPEND _dep_libs custom_ops custom_ops_aot_lib)
551+
endif()
526552
# compile options for pybind
527553

528554
set(_pybind_compile_options -Wno-deprecated-declarations -fPIC -frtti
@@ -544,19 +570,7 @@ if(EXECUTORCH_BUILD_PYBIND)
544570
PUBLIC EXECUTORCH_PYTHON_MODULE_NAME=portable_lib)
545571
target_include_directories(portable_lib PRIVATE ${TORCH_INCLUDE_DIRS})
546572
target_compile_options(portable_lib PUBLIC ${_pybind_compile_options})
547-
target_link_libraries(
548-
portable_lib
549-
PUBLIC ${TORCH_PYTHON_LIBRARY}
550-
bundled_program
551-
etdump
552-
executorch
553-
extension_data_loader
554-
portable_ops_lib
555-
util
556-
torch
557-
${PYBIND_LINK_COREML}
558-
${PYBIND_LINK_MPS}
559-
${PYBIND_LINK_XNNPACK})
573+
target_link_libraries(portable_lib PUBLIC ${_dep_libs})
560574

561575
install(TARGETS portable_lib
562576
LIBRARY DESTINATION executorch/extension/pybindings)

build/cmake_deps.toml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,18 @@ excludes = [
1919
buck_targets = [
2020
"//runtime/executor:program",
2121
]
22+
deps = [
23+
"executorch_no_prim_ops",
24+
]
25+
filters = [
26+
".cpp$",
27+
]
28+
29+
30+
[targets.executorch_no_prim_ops]
31+
buck_targets = [
32+
"//runtime/executor:program_no_prim_ops",
33+
]
2234
deps = [
2335
"program_schema",
2436
]

examples/demo-apps/android/LlamaDemo/setup.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ cmake . -DCMAKE_INSTALL_PREFIX="${CMAKE_OUT}" \
1616
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
1717
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
1818
-DEXECUTORCH_BUILD_OPTIMIZED=ON \
19+
-DEXECUTORCH_BUILD_CUSTOM=ON \
1920
-DCMAKE_BUILD_TYPE=Release \
2021
-B"${CMAKE_OUT}"
2122

0 commit comments

Comments
 (0)