Skip to content

Revert "Use new API to register custom ops for llama model (#2840)" #2912

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 11 additions & 30 deletions .ci/scripts/test_llama.sh
Original file line number Diff line number Diff line change
Expand Up @@ -37,18 +37,6 @@ if [[ -z "${MODE:-}" ]]; then
exit 1
fi

if [[ "${MODE}" =~ xnnpack.* ]]; then
XNNPACK=ON
else
XNNPACK=OFF
fi

if [[ "${MODE}" =~ .*custom.* ]]; then
CUSTOM=ON
else
CUSTOM=OFF
fi

if [[ -z "${BUCK:-}" ]]; then
BUCK=buck2
fi
Expand All @@ -59,39 +47,38 @@ fi

which "${PYTHON_EXECUTABLE}"

CMAKE_PREFIX_PATH=$($PYTHON_EXECUTABLE -c "from distutils.sysconfig import get_python_lib; print(get_python_lib())")

cmake_install_executorch_libraries() {
echo "Installing libexecutorch.a, libextension_module.so, libportable_ops_lib.a"
rm -rf cmake-out
if [[ "${MODE}" == "xnnpack" ]]; then
XNNPACK=ON
else
XNNPACK=OFF
fi
retry cmake -DBUCK2="$BUCK" \
-DCMAKE_INSTALL_PREFIX=cmake-out \
-DCMAKE_PREFIX_PATH="$CMAKE_PREFIX_PATH" \
-DCMAKE_BUILD_TYPE=Debug \
-DCMAKE_BUILD_TYPE=Release \
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
-DEXECUTORCH_BUILD_CUSTOM="$CUSTOM" \
-DEXECUTORCH_BUILD_OPTIMIZED=ON \
-DEXECUTORCH_BUILD_XNNPACK="$XNNPACK" \
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
-Bcmake-out .
cmake --build cmake-out -j9 --target install --config Debug
cmake --build cmake-out -j9 --target install --config Release
}

cmake_build_llama_runner() {
echo "Building llama runner"
dir="examples/models/llama2"
retry cmake -DBUCK2="$BUCK" \
-DCMAKE_INSTALL_PREFIX=cmake-out \
-DCMAKE_PREFIX_PATH="$CMAKE_PREFIX_PATH" \
-DCMAKE_BUILD_TYPE=Debug \
-DEXECUTORCH_BUILD_CUSTOM="$CUSTOM" \
-DCMAKE_BUILD_TYPE=Release \
-DEXECUTORCH_BUILD_OPTIMIZED=ON \
-DEXECUTORCH_BUILD_XNNPACK="$XNNPACK" \
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
-Bcmake-out/${dir} \
${dir}
cmake --build cmake-out/${dir} -j9 --config Debug
cmake --build cmake-out/${dir} -j9 --config Release

}

Expand Down Expand Up @@ -126,20 +113,13 @@ else
exit 1
fi

# Install custom ops before exporting
echo "Installing executorch libraries"
cmake_install_executorch_libraries

# Export model.
EXPORTED_MODEL_NAME="${EXPORTED_MODEL_NAME}.pte"
echo "Exporting ${EXPORTED_MODEL_NAME}"
EXPORT_ARGS="-c stories110M.pt -p ${PARAMS} -d ${DTYPE} -n ${EXPORTED_MODEL_NAME}"
if [[ "${MODE}" == "xnnpack+kv+custom" ]]; then
if [[ "${MODE}" == "xnnpack" ]]; then
EXPORT_ARGS="${EXPORT_ARGS} -kv --use_sdpa_with_kv_cache -X -qmode 8da4w -G 128"
fi
# Add dynamically linked library location
export LD_LIBRARY_PATH=${PWD}/cmake-out/lib
export DYLD_LIBRARY_PATH=${PWD}/cmake-out/lib
$PYTHON_EXECUTABLE -m examples.models.llama2.export_llama ${EXPORT_ARGS}

# Create tokenizer.bin.
Expand All @@ -155,6 +135,7 @@ if [[ "${BUILD_TOOL}" == "buck2" ]]; then
# shellcheck source=/dev/null
$BUCK run examples/models/llama2:main -- ${RUNTIME_ARGS} > result.txt
elif [[ "${BUILD_TOOL}" == "cmake" ]]; then
cmake_install_executorch_libraries
cmake_build_llama_runner
# Run llama runner
NOW=$(date +"%H:%M:%S")
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/pull.yml
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ jobs:
matrix:
dtype: [fp32]
build-tool: [buck2, cmake]
mode: [portable, xnnpack+kv+custom]
mode: [portable, xnnpack]
fail-fast: false
with:
runner: linux.2xlarge
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/trunk.yml
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ jobs:
matrix:
dtype: [fp32]
build-tool: [buck2, cmake]
mode: [portable, xnnpack+kv+custom]
mode: [portable, xnnpack]
fail-fast: false
with:
runner: macos-m1-stable
Expand Down
62 changes: 25 additions & 37 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -175,9 +175,8 @@ option(EXECUTORCH_BUILD_VULKAN "Build the Vulkan backend" OFF)
#
# pthreadpool: build pthreadpool library. Disable on unsupported platforms
#
cmake_dependent_option(
EXECUTORCH_BUILD_PTHREADPOOL "Build pthreadpool library." ON
"NOT EXECUTORCH_BUILD_ARM_BAREMETAL" OFF)
cmake_dependent_option(EXECUTORCH_BUILD_PTHREADPOOL "Build pthreadpool library."
ON "NOT EXECUTORCH_BUILD_ARM_BAREMETAL" OFF)

#
# cpuinfo: build cpuinfo library. Disable on unsupported platforms
Expand All @@ -187,9 +186,6 @@ cmake_dependent_option(EXECUTORCH_BUILD_CPUINFO "Build cpuinfo library." ON

if(EXECUTORCH_BUILD_CPUINFO)
# --- cpuinfo
set(ORIGINAL_CMAKE_POSITION_INDEPENDENT_CODE_FLAG
${CMAKE_POSITION_INDEPENDENT_CODE})
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
set(CPUINFO_SOURCE_DIR "backends/xnnpack/third-party/cpuinfo")
set(CPUINFO_BUILD_TOOLS
OFF
Expand All @@ -211,15 +207,10 @@ if(EXECUTORCH_BUILD_CPUINFO)
CACHE STRING "")
set(CLOG_SOURCE_DIR "${CPUINFO_SOURCE_DIR}/deps/clog")
add_subdirectory("${CPUINFO_SOURCE_DIR}")
set(CMAKE_POSITION_INDEPENDENT_CODE
${ORIGINAL_CMAKE_POSITION_INDEPENDENT_CODE_FLAG})
endif()

if(EXECUTORCH_BUILD_PTHREADPOOL)
# --- pthreadpool
set(ORIGINAL_CMAKE_POSITION_INDEPENDENT_CODE_FLAG
${CMAKE_POSITION_INDEPENDENT_CODE})
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
set(PTHREADPOOL_SOURCE_DIR "backends/xnnpack/third-party/pthreadpool")
set(PTHREADPOOL_BUILD_TESTS
OFF
Expand All @@ -239,8 +230,6 @@ if(EXECUTORCH_BUILD_PTHREADPOOL)
CACHE STRING "")
endif()
add_subdirectory("${PTHREADPOOL_SOURCE_DIR}")
set(CMAKE_POSITION_INDEPENDENT_CODE
${ORIGINAL_CMAKE_POSITION_INDEPENDENT_CODE_FLAG})
endif()

if(NOT PYTHON_EXECUTABLE)
Expand Down Expand Up @@ -515,38 +504,25 @@ if(EXECUTORCH_BUILD_PYBIND)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/sdk)
endif()

# find pytorch lib, to allow pybind to take at::Tensor as input/output
find_package(Torch CONFIG REQUIRED)
find_library(TORCH_PYTHON_LIBRARY torch_python
PATHS "${TORCH_INSTALL_PREFIX}/lib")

set(_dep_libs
${TORCH_PYTHON_LIBRARY}
bundled_program
etdump
executorch
extension_data_loader
portable_ops_lib
util
torch)

if(EXECUTORCH_BUILD_COREML)
list(APPEND _dep_libs coremldelegate)
set(PYBIND_LINK_COREML "coremldelegate")
endif()

if(EXECUTORCH_BUILD_MPS)
list(APPEND _dep_libs mpsdelegate)
set(PYBIND_LINK_MPS "mpsdelegate")
endif()

if(EXECUTORCH_BUILD_XNNPACK)
# need to explicitly specify XNNPACK here otherwise uses XNNPACK symbols
# from libtorch_cpu
list(APPEND _dep_libs xnnpack_backend XNNPACK)
# need to explicitly specify XNNPACK here
# otherwise uses XNNPACK symbols from libtorch_cpu
set(PYBIND_LINK_XNNPACK xnnpack_backend XNNPACK)
endif()

if(EXECUTORCH_BUILD_CUSTOM)
list(APPEND _dep_libs custom_ops custom_ops_aot_lib)
endif()
# find pytorch lib, to allow pybind to take at::Tensor as input/output
find_package(Torch CONFIG REQUIRED)
find_library(TORCH_PYTHON_LIBRARY torch_python
PATHS "${TORCH_INSTALL_PREFIX}/lib")

# compile options for pybind

set(_pybind_compile_options -Wno-deprecated-declarations -fPIC -frtti
Expand All @@ -568,7 +544,19 @@ if(EXECUTORCH_BUILD_PYBIND)
PUBLIC EXECUTORCH_PYTHON_MODULE_NAME=portable_lib)
target_include_directories(portable_lib PRIVATE ${TORCH_INCLUDE_DIRS})
target_compile_options(portable_lib PUBLIC ${_pybind_compile_options})
target_link_libraries(portable_lib PUBLIC ${_dep_libs})
target_link_libraries(
portable_lib
PUBLIC ${TORCH_PYTHON_LIBRARY}
bundled_program
etdump
executorch
extension_data_loader
portable_ops_lib
util
torch
${PYBIND_LINK_COREML}
${PYBIND_LINK_MPS}
${PYBIND_LINK_XNNPACK})

install(TARGETS portable_lib
LIBRARY DESTINATION executorch/extension/pybindings)
Expand Down
66 changes: 23 additions & 43 deletions examples/models/llama2/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -49,72 +49,56 @@ set(_common_compile_options -Wno-deprecated-declarations -fPIC)
# Let files say "include <executorch/path/to/header.h>".
set(_common_include_directories ${EXECUTORCH_ROOT}/..)

# For some reason android build is not able to find where gflags is and hence
# cannot find corresponding .cmake file
# For some reason android build is not able to find where gflags is
# and hence cannot find corresponding .cmake file
set(gflags_DIR ${CMAKE_CURRENT_BINARY_DIR}/../../../third-party/gflags)
find_package(gflags REQUIRED)

#
# llama_main: test binary to run llama, with tokenizer and sampler integrated
#
add_executable(llama_main main.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../../backends/xnnpack/threadpool/cpuinfo_utils.cpp)
if(CMAKE_BUILD_TYPE EQUAL "RELEASE")
target_link_options(llama_main PRIVATE "LINKER:--gc-sections")
endif()

# find `executorch` libraries Same as for gflags
# find `executorch` libraries
# Same as for gflags
set(executorch_DIR ${CMAKE_CURRENT_BINARY_DIR}/../../../lib/cmake/ExecuTorch)
find_package(executorch CONFIG REQUIRED)
if(CMAKE_TOOLCHAIN_IOS OR ANDROID)
target_link_options_shared_lib(executorch)
endif()

# custom ops library
if(EXECUTORCH_BUILD_CUSTOM)
add_subdirectory(custom_ops)
endif()
add_subdirectory(custom_ops)

# llama_runner library
add_subdirectory(runner)

target_include_directories(llama_main PUBLIC
${CMAKE_CURRENT_SOURCE_DIR}/../../../backends/xnnpack/third-party/cpuinfo/include)
target_include_directories(llama_main PUBLIC
${CMAKE_CURRENT_SOURCE_DIR}/../../../backends/xnnpack/third-party/pthreadpool/include)

set(link_libraries)
set(_srcs main.cpp)

if(EXECUTORCH_BUILD_OPTIMIZED)
list(
APPEND
link_libraries
optimized_native_cpu_ops_lib
optimized_kernels
portable_kernels
cpublas
eigen_blas)
list(APPEND link_libraries optimized_native_cpu_ops_lib optimized_kernels
portable_kernels cpublas eigen_blas)
target_link_options_shared_lib(optimized_native_cpu_ops_lib)
else()
list(APPEND link_libraries portable_ops_lib portable_kernels)
target_link_options_shared_lib(portable_ops_lib)
endif()

if(EXECUTORCH_BUILD_CUSTOM)
target_link_options_shared_lib(custom_ops)
list(APPEND link_libraries custom_ops)
endif()
target_link_libraries(llama_main PUBLIC gflags llama_runner custom_ops_lib)

# XNNPACK pthreadpool cpuinfo
if(TARGET xnnpack_backend)
set(xnnpack_backend_libs xnnpack_backend XNNPACK pthreadpool cpuinfo)
list(APPEND link_libraries ${xnnpack_backend_libs})
# HACK: main only include these when xnnpack backend is availabe, so that we
# have all the threadpool sources under xnnpack.
list(APPEND _common_compile_options -DET_USE_THREADPOOL)
list(
APPEND
_srcs
${CMAKE_CURRENT_SOURCE_DIR}/../../../backends/xnnpack/threadpool/cpuinfo_utils.cpp
)
list(
APPEND
_common_include_directories
${CMAKE_CURRENT_SOURCE_DIR}/../../../backends/xnnpack/third-party/cpuinfo/include
${CMAKE_CURRENT_SOURCE_DIR}/../../../backends/xnnpack/third-party/pthreadpool/include
)
# end of hack
target_link_options_shared_lib(xnnpack_backend)
endif()

Expand All @@ -130,19 +114,15 @@ if(TARGET qnn_executorch_backend)
target_link_options_shared_lib(qnn_executorch_backend)
endif()

# This one is needed for cpuinfo where it uses android specific log lib
# This one is needed for cpuinfo where it uses android
# specific log lib
if(ANDROID)
list(APPEND link_libraries log)
endif()

add_executable(llama_main ${_srcs})
if(CMAKE_BUILD_TYPE EQUAL "RELEASE")
target_link_options(llama_main PRIVATE "LINKER:--gc-sections")
endif()

target_include_directories(llama_main PUBLIC ${_common_include_directories})
target_link_libraries(llama_main PUBLIC gflags llama_runner ${link_libraries})
target_compile_options(llama_main PUBLIC ${_common_compile_options})
target_compile_options(llama_main PUBLIC ${_common_compile_options}
-DET_USE_THREADPOOL)
target_link_libraries(llama_main PUBLIC ${link_libraries})

if(APPLE)
target_link_options_shared_lib(executorch)
Expand Down
3 changes: 1 addition & 2 deletions examples/models/llama2/TARGETS
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ runtime.python_library(
],
deps = [
"//caffe2:torch",
"//executorch/examples/models/llama2/custom_ops:custom_ops_aot_py",
"//executorch/examples/models/llama2/custom_ops:llama_custom_ops_aot_lib",
],
)

Expand Down Expand Up @@ -52,7 +52,6 @@ runtime.python_binary(
main_module = "executorch.examples.models.llama2.export_llama",
# visibility = ["//executorch/examples/..."],
preload_deps = [
"//executorch/examples/models/llama2/custom_ops:custom_ops_aot_lib",
"//executorch/kernels/quantized:aot_lib",
],
deps = [
Expand Down
Loading