Skip to content

Consolidate EXECUTORCH_BUILD_CUSTOM option #2935

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 22 additions & 11 deletions .ci/scripts/test_llama.sh
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,18 @@ if [[ -z "${MODE:-}" ]]; then
exit 1
fi

if [[ "${MODE}" =~ xnnpack.* ]]; then
XNNPACK=ON
else
XNNPACK=OFF
fi

if [[ "${MODE}" =~ .*custom.* ]]; then
CUSTOM=ON
else
CUSTOM=OFF
fi

if [[ -z "${BUCK:-}" ]]; then
BUCK=buck2
fi
Expand All @@ -47,38 +59,36 @@ fi

which "${PYTHON_EXECUTABLE}"


cmake_install_executorch_libraries() {
echo "Installing libexecutorch.a, libextension_module.so, libportable_ops_lib.a"
rm -rf cmake-out
if [[ "${MODE}" == "xnnpack" ]]; then
XNNPACK=ON
else
XNNPACK=OFF
fi
retry cmake -DBUCK2="$BUCK" \
-DCMAKE_INSTALL_PREFIX=cmake-out \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_BUILD_TYPE=Debug \
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
-DEXECUTORCH_BUILD_CUSTOM="$CUSTOM" \
-DEXECUTORCH_BUILD_OPTIMIZED=ON \
-DEXECUTORCH_BUILD_XNNPACK="$XNNPACK" \
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
-Bcmake-out .
cmake --build cmake-out -j9 --target install --config Release
cmake --build cmake-out -j9 --target install --config Debug
}

cmake_build_llama_runner() {
echo "Building llama runner"
dir="examples/models/llama2"
retry cmake -DBUCK2="$BUCK" \
-DCMAKE_INSTALL_PREFIX=cmake-out \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_BUILD_TYPE=Debug \
-DEXECUTORCH_BUILD_CUSTOM="$CUSTOM" \
-DEXECUTORCH_BUILD_OPTIMIZED=ON \
-DEXECUTORCH_BUILD_XNNPACK="$XNNPACK" \
-DEXECUTORCH_BUILD_OPTIMIZED=ON \
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
-Bcmake-out/${dir} \
${dir}
cmake --build cmake-out/${dir} -j9 --config Release
cmake --build cmake-out/${dir} -j9 --config Debug

}

Expand Down Expand Up @@ -117,9 +127,10 @@ fi
EXPORTED_MODEL_NAME="${EXPORTED_MODEL_NAME}.pte"
echo "Exporting ${EXPORTED_MODEL_NAME}"
EXPORT_ARGS="-c stories110M.pt -p ${PARAMS} -d ${DTYPE} -n ${EXPORTED_MODEL_NAME}"
if [[ "${MODE}" == "xnnpack" ]]; then
if [[ "${MODE}" == "xnnpack+kv+custom" ]]; then
EXPORT_ARGS="${EXPORT_ARGS} -kv --use_sdpa_with_kv_cache -X -qmode 8da4w -G 128"
fi
# Add dynamically linked library location
$PYTHON_EXECUTABLE -m examples.models.llama2.export_llama ${EXPORT_ARGS}

# Create tokenizer.bin.
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/pull.yml
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ jobs:
matrix:
dtype: [fp32]
build-tool: [buck2, cmake]
mode: [portable, xnnpack]
mode: [portable, xnnpack+kv+custom]
fail-fast: false
with:
runner: linux.2xlarge
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/trunk.yml
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ jobs:
matrix:
dtype: [fp32]
build-tool: [buck2, cmake]
mode: [portable, xnnpack]
mode: [portable, xnnpack+kv+custom]
fail-fast: false
with:
runner: macos-m1-stable
Expand Down
55 changes: 31 additions & 24 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -175,15 +175,20 @@ option(EXECUTORCH_BUILD_VULKAN "Build the Vulkan backend" OFF)
#
# pthreadpool: build pthreadpool library. Disable on unsupported platforms
#
cmake_dependent_option(EXECUTORCH_BUILD_PTHREADPOOL "Build pthreadpool library."
ON "NOT EXECUTORCH_BUILD_ARM_BAREMETAL" OFF)
cmake_dependent_option(
EXECUTORCH_BUILD_PTHREADPOOL "Build pthreadpool library." ON
"NOT EXECUTORCH_BUILD_ARM_BAREMETAL" OFF)

#
# cpuinfo: build cpuinfo library. Disable on unsupported platforms
#
cmake_dependent_option(EXECUTORCH_BUILD_CPUINFO "Build cpuinfo library." ON
"NOT EXECUTORCH_BUILD_ARM_BAREMETAL" OFF)

if(EXECUTORCH_BUILD_CUSTOM)
set(EXECUTORCH_BUILD_OPTIMIZED ON)
endif()

if(EXECUTORCH_BUILD_CPUINFO)
# --- cpuinfo
set(CPUINFO_SOURCE_DIR "backends/xnnpack/third-party/cpuinfo")
Expand Down Expand Up @@ -508,24 +513,38 @@ if(EXECUTORCH_BUILD_PYBIND)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/sdk)
endif()

# find pytorch lib, to allow pybind to take at::Tensor as input/output
find_package(Torch CONFIG REQUIRED)
find_library(TORCH_PYTHON_LIBRARY torch_python
PATHS "${TORCH_INSTALL_PREFIX}/lib")

set(_dep_libs
${TORCH_PYTHON_LIBRARY}
bundled_program
etdump
executorch
extension_data_loader
portable_ops_lib
util
torch)

if(EXECUTORCH_BUILD_COREML)
set(PYBIND_LINK_COREML "coremldelegate")
list(APPEND _dep_libs coremldelegate)
endif()

if(EXECUTORCH_BUILD_MPS)
set(PYBIND_LINK_MPS "mpsdelegate")
list(APPEND _dep_libs mpsdelegate)
endif()

if(EXECUTORCH_BUILD_XNNPACK)
# need to explicitly specify XNNPACK here
# otherwise uses XNNPACK symbols from libtorch_cpu
set(PYBIND_LINK_XNNPACK xnnpack_backend XNNPACK)
# need to explicitly specify XNNPACK here otherwise uses XNNPACK symbols
# from libtorch_cpu
list(APPEND _dep_libs xnnpack_backend XNNPACK)
endif()

# find pytorch lib, to allow pybind to take at::Tensor as input/output
find_package(Torch CONFIG REQUIRED)
find_library(TORCH_PYTHON_LIBRARY torch_python
PATHS "${TORCH_INSTALL_PREFIX}/lib")
if(EXECUTORCH_BUILD_CUSTOM)
list(APPEND _dep_libs custom_ops_lib)
endif()

# compile options for pybind

Expand All @@ -548,19 +567,7 @@ if(EXECUTORCH_BUILD_PYBIND)
PUBLIC EXECUTORCH_PYTHON_MODULE_NAME=portable_lib)
target_include_directories(portable_lib PRIVATE ${TORCH_INCLUDE_DIRS})
target_compile_options(portable_lib PUBLIC ${_pybind_compile_options})
target_link_libraries(
portable_lib
PUBLIC ${TORCH_PYTHON_LIBRARY}
bundled_program
etdump
executorch
extension_data_loader
portable_ops_lib
util
torch
${PYBIND_LINK_COREML}
${PYBIND_LINK_MPS}
${PYBIND_LINK_XNNPACK})
target_link_libraries(portable_lib PUBLIC ${_dep_libs})

install(TARGETS portable_lib
LIBRARY DESTINATION executorch/extension/pybindings)
Expand Down
1 change: 1 addition & 0 deletions examples/demo-apps/android/LlamaDemo/setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ cmake . -DCMAKE_INSTALL_PREFIX="${CMAKE_OUT}" \
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
-DEXECUTORCH_BUILD_OPTIMIZED=ON \
-DEXECUTORCH_BUILD_CUSTOM=ON \
-DCMAKE_BUILD_TYPE=Release \
-B"${CMAKE_OUT}"

Expand Down
98 changes: 73 additions & 25 deletions examples/models/llama2/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,23 @@
cmake_minimum_required(VERSION 3.19)
project(llama_runner)

# Duplicating options as root CMakeLists.txt
option(EXECUTORCH_BUILD_OPTIMIZED "Build the optimized kernels" OFF)

include(CMakeDependentOption)
#
# pthreadpool: build pthreadpool library. Disable on unsupported platforms
#
cmake_dependent_option(
EXECUTORCH_BUILD_PTHREADPOOL "Build pthreadpool library." ON
"NOT EXECUTORCH_BUILD_ARM_BAREMETAL" OFF)
#
# cpuinfo: build cpuinfo library. Disable on unsupported platforms
#
cmake_dependent_option(EXECUTORCH_BUILD_CPUINFO "Build cpuinfo library." ON
"NOT EXECUTORCH_BUILD_ARM_BAREMETAL" OFF)


if(NOT PYTHON_EXECUTABLE)
set(PYTHON_EXECUTABLE python3)
endif()
Expand Down Expand Up @@ -49,55 +64,84 @@ set(_common_compile_options -Wno-deprecated-declarations -fPIC)
# Let files say "include <executorch/path/to/header.h>".
set(_common_include_directories ${EXECUTORCH_ROOT}/..)

# For some reason android build is not able to find where gflags is
# and hence cannot find corresponding .cmake file
# For some reason android build is not able to find where gflags is and hence
# cannot find corresponding .cmake file
set(gflags_DIR ${CMAKE_CURRENT_BINARY_DIR}/../../../third-party/gflags)
find_package(gflags REQUIRED)

#
# llama_main: test binary to run llama, with tokenizer and sampler integrated
#
add_executable(llama_main main.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../../backends/xnnpack/threadpool/cpuinfo_utils.cpp)
if(CMAKE_BUILD_TYPE EQUAL "RELEASE")
target_link_options(llama_main PRIVATE "LINKER:--gc-sections")
endif()

# find `executorch` libraries
# Same as for gflags
# find `executorch` libraries Same as for gflags
set(executorch_DIR ${CMAKE_CURRENT_BINARY_DIR}/../../../lib/cmake/ExecuTorch)
find_package(executorch CONFIG REQUIRED)
if(CMAKE_TOOLCHAIN_IOS OR ANDROID)
target_link_options_shared_lib(executorch)
endif()

# custom ops library
add_subdirectory(custom_ops)
if(EXECUTORCH_BUILD_CUSTOM)
add_subdirectory(custom_ops)
endif()

# llama_runner library
add_subdirectory(runner)

target_include_directories(llama_main PUBLIC
${CMAKE_CURRENT_SOURCE_DIR}/../../../backends/xnnpack/third-party/cpuinfo/include)
target_include_directories(llama_main PUBLIC
${CMAKE_CURRENT_SOURCE_DIR}/../../../backends/xnnpack/third-party/pthreadpool/include)

set(link_libraries)
set(_srcs main.cpp)

if(EXECUTORCH_BUILD_OPTIMIZED)
list(APPEND link_libraries optimized_native_cpu_ops_lib optimized_kernels
portable_kernels cpublas eigen_blas)
list(
APPEND
link_libraries
optimized_native_cpu_ops_lib
optimized_kernels
portable_kernels
cpublas
eigen_blas)
target_link_options_shared_lib(optimized_native_cpu_ops_lib)
else()
list(APPEND link_libraries portable_ops_lib portable_kernels)
target_link_options_shared_lib(portable_ops_lib)
endif()

target_link_libraries(llama_main PUBLIC gflags llama_runner custom_ops_lib)
if(EXECUTORCH_BUILD_CUSTOM)
target_link_options_shared_lib(custom_ops_lib)
list(APPEND link_libraries custom_ops_lib)
endif()

# Extra compile option and include dir for pthreadpool
if(EXECUTORCH_BUILD_PTHREADPOOL)
list(APPEND _common_compile_options -DET_USE_THREADPOOL)
list(APPEND link_libraries pthreadpool)
list(
APPEND
_srcs
${CMAKE_CURRENT_SOURCE_DIR}/../../../backends/xnnpack/threadpool/threadpool.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../../backends/xnnpack/threadpool/threadpool_guard.cpp
)
list(APPEND _common_include_directories ${CMAKE_CURRENT_SOURCE_DIR}/../../../backends/xnnpack/third-party/pthreadpool/include)
endif()

# Extra sources for cpuinfo
if(EXECUTORCH_BUILD_CPUINFO)
list(APPEND link_libraries cpuinfo)
list(
APPEND
_srcs
${CMAKE_CURRENT_SOURCE_DIR}/../../../backends/xnnpack/threadpool/cpuinfo_utils.cpp
)
list(
APPEND
_common_include_directories
${CMAKE_CURRENT_SOURCE_DIR}/../../../backends/xnnpack/third-party/cpuinfo/include
)
endif()

# XNNPACK pthreadpool cpuinfo
# XNNPACK
if(TARGET xnnpack_backend)
set(xnnpack_backend_libs xnnpack_backend XNNPACK pthreadpool cpuinfo)
set(xnnpack_backend_libs xnnpack_backend XNNPACK)
list(APPEND link_libraries ${xnnpack_backend_libs})
target_link_options_shared_lib(xnnpack_backend)
endif()
Expand All @@ -114,15 +158,19 @@ if(TARGET qnn_executorch_backend)
target_link_options_shared_lib(qnn_executorch_backend)
endif()

# This one is needed for cpuinfo where it uses android
# specific log lib
# This one is needed for cpuinfo where it uses android specific log lib
if(ANDROID)
list(APPEND link_libraries log)
endif()

target_compile_options(llama_main PUBLIC ${_common_compile_options}
-DET_USE_THREADPOOL)
target_link_libraries(llama_main PUBLIC ${link_libraries})
add_executable(llama_main ${_srcs})
if(CMAKE_BUILD_TYPE EQUAL "RELEASE")
target_link_options(llama_main PRIVATE "LINKER:--gc-sections")
endif()

target_include_directories(llama_main PUBLIC ${_common_include_directories})
target_link_libraries(llama_main PUBLIC gflags llama_runner ${link_libraries})
target_compile_options(llama_main PUBLIC ${_common_compile_options})

if(APPLE)
target_link_options_shared_lib(executorch)
Expand Down
3 changes: 1 addition & 2 deletions examples/models/llama2/runner/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,7 @@ else()
add_library(llama_runner SHARED ${_llama_runner__srcs})
endif()

set(llama_runner_deps executorch extension_module extension_data_loader
custom_ops)
set(llama_runner_deps executorch extension_module extension_data_loader)

target_link_libraries(
llama_runner PUBLIC ${llama_runner_deps})
Expand Down
3 changes: 1 addition & 2 deletions extension/aten_util/make_aten_functor_from_et_functor.h
Original file line number Diff line number Diff line change
Expand Up @@ -149,8 +149,7 @@ struct type_convert<
}
c10::ScalarType scalar_type =
static_cast<c10::ScalarType>(val.scalar_type());
converted =
at::from_blob(val.mutable_data_ptr(), val.numel(), sizes, scalar_type);
converted = at::from_blob(val.mutable_data_ptr(), sizes, scalar_type);
}
ATensor call() {
return converted;
Expand Down
1 change: 1 addition & 0 deletions extension/aten_util/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ def define_common_targets():
],
exported_deps = [
"//executorch/extension/kernel_util:kernel_util",
"//executorch/extension/runner_util:managed_tensor",
"//executorch/runtime/core:core",
"//executorch/runtime/core:evalue",
"//executorch/runtime/core/exec_aten:lib",
Expand Down
2 changes: 1 addition & 1 deletion extension/kernel_util/meta_programming.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ struct is_compile_time_function_pointer<
CompileTimeFunctionPointer<FuncType, func_ptr>> : std::true_type {};

#define EXECUTORCH_FN_TYPE(func) \
CompileTimeFunctionPointer< \
::torch::executor::CompileTimeFunctionPointer< \
std::remove_pointer_t<std::remove_reference_t<decltype(func)>>, \
func>
#define EXECUTORCH_FN(func) EXECUTORCH_FN_TYPE(func)()
Expand Down