pytorch · larryliu0820 · Apr 7, 2024
diff --git a/.ci/scripts/test_llama.sh b/.ci/scripts/test_llama.sh
@@ -37,18 +37,6 @@ if [[ -z "${MODE:-}" ]]; then
   exit 1
 fi
 
-if [[ "${MODE}" =~ xnnpack.* ]]; then
-  XNNPACK=ON
-else
-  XNNPACK=OFF
-fi
-
-if [[ "${MODE}" =~ .*custom.* ]]; then
-  CUSTOM=ON
-else
-  CUSTOM=OFF
-fi
-
 if [[ -z "${BUCK:-}" ]]; then
   BUCK=buck2
 fi
@@ -59,39 +47,38 @@ fi
 
 which "${PYTHON_EXECUTABLE}"
 
-CMAKE_PREFIX_PATH=$($PYTHON_EXECUTABLE -c "from distutils.sysconfig import get_python_lib; print(get_python_lib())")
 
 cmake_install_executorch_libraries() {
     echo "Installing libexecutorch.a, libextension_module.so, libportable_ops_lib.a"
     rm -rf cmake-out
+    if [[ "${MODE}" == "xnnpack" ]]; then
+      XNNPACK=ON
+    else
+      XNNPACK=OFF
+    fi
     retry cmake -DBUCK2="$BUCK" \
         -DCMAKE_INSTALL_PREFIX=cmake-out \
-        -DCMAKE_PREFIX_PATH="$CMAKE_PREFIX_PATH" \
-        -DCMAKE_BUILD_TYPE=Debug \
+        -DCMAKE_BUILD_TYPE=Release \
         -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
         -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
-        -DEXECUTORCH_BUILD_CUSTOM="$CUSTOM" \
         -DEXECUTORCH_BUILD_OPTIMIZED=ON \
         -DEXECUTORCH_BUILD_XNNPACK="$XNNPACK" \
         -DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
         -Bcmake-out .
-    cmake --build cmake-out -j9 --target install --config Debug
+    cmake --build cmake-out -j9 --target install --config Release
 }
 
 cmake_build_llama_runner() {
     echo "Building llama runner"
     dir="examples/models/llama2"
     retry cmake -DBUCK2="$BUCK" \
         -DCMAKE_INSTALL_PREFIX=cmake-out \
-        -DCMAKE_PREFIX_PATH="$CMAKE_PREFIX_PATH" \
-        -DCMAKE_BUILD_TYPE=Debug \
-        -DEXECUTORCH_BUILD_CUSTOM="$CUSTOM" \
+        -DCMAKE_BUILD_TYPE=Release \
         -DEXECUTORCH_BUILD_OPTIMIZED=ON \
-        -DEXECUTORCH_BUILD_XNNPACK="$XNNPACK" \
         -DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
         -Bcmake-out/${dir} \
         ${dir}
-    cmake --build cmake-out/${dir} -j9 --config Debug
+    cmake --build cmake-out/${dir} -j9 --config Release
 
 }
 
@@ -126,20 +113,13 @@ else
   exit 1
 fi
 
-# Install custom ops before exporting
-echo "Installing executorch libraries"
-cmake_install_executorch_libraries
-
 # Export model.
 EXPORTED_MODEL_NAME="${EXPORTED_MODEL_NAME}.pte"
 echo "Exporting ${EXPORTED_MODEL_NAME}"
 EXPORT_ARGS="-c stories110M.pt -p ${PARAMS} -d ${DTYPE} -n ${EXPORTED_MODEL_NAME}"
-if [[ "${MODE}" == "xnnpack+kv+custom" ]]; then
+if [[ "${MODE}" == "xnnpack" ]]; then
   EXPORT_ARGS="${EXPORT_ARGS} -kv --use_sdpa_with_kv_cache -X -qmode 8da4w -G 128"
 fi
-# Add dynamically linked library location
-export LD_LIBRARY_PATH=${PWD}/cmake-out/lib
-export DYLD_LIBRARY_PATH=${PWD}/cmake-out/lib
 $PYTHON_EXECUTABLE -m examples.models.llama2.export_llama ${EXPORT_ARGS}
 
 # Create tokenizer.bin.
@@ -155,6 +135,7 @@ if [[ "${BUILD_TOOL}" == "buck2" ]]; then
   # shellcheck source=/dev/null
   $BUCK run examples/models/llama2:main -- ${RUNTIME_ARGS} > result.txt
 elif [[ "${BUILD_TOOL}" == "cmake" ]]; then
+  cmake_install_executorch_libraries
   cmake_build_llama_runner
   # Run llama runner
   NOW=$(date +"%H:%M:%S")

diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
@@ -90,7 +90,7 @@ jobs:
       matrix:
         dtype: [fp32]
         build-tool: [buck2, cmake]
-        mode: [portable, xnnpack+kv+custom]
+        mode: [portable, xnnpack]
       fail-fast: false
     with:
       runner: linux.2xlarge

diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml
@@ -254,7 +254,7 @@ jobs:
       matrix:
         dtype: [fp32]
         build-tool: [buck2, cmake]
-        mode: [portable, xnnpack+kv+custom]
+        mode: [portable, xnnpack]
       fail-fast: false
     with:
       runner: macos-m1-stable

@@ -175,9 +175,8 @@ option(EXECUTORCH_BUILD_VULKAN "Build the Vulkan backend" OFF)
 #
 # pthreadpool: build pthreadpool library. Disable on unsupported platforms
 #
-cmake_dependent_option(
-  EXECUTORCH_BUILD_PTHREADPOOL "Build pthreadpool library." ON
-  "NOT EXECUTORCH_BUILD_ARM_BAREMETAL" OFF)
+cmake_dependent_option(EXECUTORCH_BUILD_PTHREADPOOL "Build pthreadpool library."
+                       ON "NOT EXECUTORCH_BUILD_ARM_BAREMETAL" OFF)
 
 #
 # cpuinfo: build cpuinfo library. Disable on unsupported platforms
@@ -187,9 +186,6 @@ cmake_dependent_option(EXECUTORCH_BUILD_CPUINFO "Build cpuinfo library." ON
 
 if(EXECUTORCH_BUILD_CPUINFO)
   # --- cpuinfo
-  set(ORIGINAL_CMAKE_POSITION_INDEPENDENT_CODE_FLAG
-      ${CMAKE_POSITION_INDEPENDENT_CODE})
-  set(CMAKE_POSITION_INDEPENDENT_CODE ON)
   set(CPUINFO_SOURCE_DIR "backends/xnnpack/third-party/cpuinfo")
   set(CPUINFO_BUILD_TOOLS
       OFF
@@ -211,15 +207,10 @@ if(EXECUTORCH_BUILD_CPUINFO)
       CACHE STRING "")
   set(CLOG_SOURCE_DIR "${CPUINFO_SOURCE_DIR}/deps/clog")
   add_subdirectory("${CPUINFO_SOURCE_DIR}")
-  set(CMAKE_POSITION_INDEPENDENT_CODE
-      ${ORIGINAL_CMAKE_POSITION_INDEPENDENT_CODE_FLAG})
 endif()
 
 if(EXECUTORCH_BUILD_PTHREADPOOL)
   # --- pthreadpool
-  set(ORIGINAL_CMAKE_POSITION_INDEPENDENT_CODE_FLAG
-      ${CMAKE_POSITION_INDEPENDENT_CODE})
-  set(CMAKE_POSITION_INDEPENDENT_CODE ON)
   set(PTHREADPOOL_SOURCE_DIR "backends/xnnpack/third-party/pthreadpool")
   set(PTHREADPOOL_BUILD_TESTS
       OFF
@@ -239,8 +230,6 @@ if(EXECUTORCH_BUILD_PTHREADPOOL)
        CACHE STRING "")
   endif()
   add_subdirectory("${PTHREADPOOL_SOURCE_DIR}")
-  set(CMAKE_POSITION_INDEPENDENT_CODE
-      ${ORIGINAL_CMAKE_POSITION_INDEPENDENT_CODE_FLAG})
 endif()
 
 if(NOT PYTHON_EXECUTABLE)
@@ -515,38 +504,25 @@ if(EXECUTORCH_BUILD_PYBIND)
     add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/sdk)
   endif()
 
-  # find pytorch lib, to allow pybind to take at::Tensor as input/output
-  find_package(Torch CONFIG REQUIRED)
-  find_library(TORCH_PYTHON_LIBRARY torch_python
-               PATHS "${TORCH_INSTALL_PREFIX}/lib")
-
-  set(_dep_libs
-      ${TORCH_PYTHON_LIBRARY}
-      bundled_program
-      etdump
-      executorch
-      extension_data_loader
-      portable_ops_lib
-      util
-      torch)
-
   if(EXECUTORCH_BUILD_COREML)
-    list(APPEND _dep_libs coremldelegate)
+    set(PYBIND_LINK_COREML "coremldelegate")
   endif()
 
   if(EXECUTORCH_BUILD_MPS)
-    list(APPEND _dep_libs mpsdelegate)
+    set(PYBIND_LINK_MPS "mpsdelegate")
   endif()
 
   if(EXECUTORCH_BUILD_XNNPACK)
-    # need to explicitly specify XNNPACK here otherwise uses XNNPACK symbols
-    # from libtorch_cpu
-    list(APPEND _dep_libs xnnpack_backend XNNPACK)
+    # need to explicitly specify XNNPACK here
+    # otherwise uses XNNPACK symbols from libtorch_cpu
+    set(PYBIND_LINK_XNNPACK xnnpack_backend XNNPACK)
   endif()
 
-  if(EXECUTORCH_BUILD_CUSTOM)
-    list(APPEND _dep_libs custom_ops custom_ops_aot_lib)
-  endif()
+  # find pytorch lib, to allow pybind to take at::Tensor as input/output
+  find_package(Torch CONFIG REQUIRED)
+  find_library(TORCH_PYTHON_LIBRARY torch_python
+               PATHS "${TORCH_INSTALL_PREFIX}/lib")
+
   # compile options for pybind
 
   set(_pybind_compile_options -Wno-deprecated-declarations -fPIC -frtti
@@ -568,7 +544,19 @@ if(EXECUTORCH_BUILD_PYBIND)
                              PUBLIC EXECUTORCH_PYTHON_MODULE_NAME=portable_lib)
   target_include_directories(portable_lib PRIVATE ${TORCH_INCLUDE_DIRS})
   target_compile_options(portable_lib PUBLIC ${_pybind_compile_options})
-  target_link_libraries(portable_lib PUBLIC ${_dep_libs})
+  target_link_libraries(
+    portable_lib
+    PUBLIC ${TORCH_PYTHON_LIBRARY}
+           bundled_program
+           etdump
+           executorch
+           extension_data_loader
+           portable_ops_lib
+           util
+           torch
+           ${PYBIND_LINK_COREML}
+           ${PYBIND_LINK_MPS}
+           ${PYBIND_LINK_XNNPACK})
 
   install(TARGETS portable_lib
           LIBRARY DESTINATION executorch/extension/pybindings)

@@ -49,72 +49,56 @@ set(_common_compile_options -Wno-deprecated-declarations -fPIC)
 # Let files say "include <executorch/path/to/header.h>".
 set(_common_include_directories ${EXECUTORCH_ROOT}/..)
 
-# For some reason android build is not able to find where gflags is and hence
-# cannot find corresponding .cmake file
+# For some reason android build is not able to find where gflags is
+# and hence cannot find corresponding .cmake file
 set(gflags_DIR ${CMAKE_CURRENT_BINARY_DIR}/../../../third-party/gflags)
 find_package(gflags REQUIRED)
 
 #
 # llama_main: test binary to run llama, with tokenizer and sampler integrated
 #
+add_executable(llama_main main.cpp
+${CMAKE_CURRENT_SOURCE_DIR}/../../../backends/xnnpack/threadpool/cpuinfo_utils.cpp)
+if(CMAKE_BUILD_TYPE EQUAL "RELEASE")
+  target_link_options(llama_main PRIVATE "LINKER:--gc-sections")
+endif()
 
-# find `executorch` libraries Same as for gflags
+# find `executorch` libraries
+# Same as for gflags
 set(executorch_DIR ${CMAKE_CURRENT_BINARY_DIR}/../../../lib/cmake/ExecuTorch)
 find_package(executorch CONFIG REQUIRED)
 if(CMAKE_TOOLCHAIN_IOS OR ANDROID)
   target_link_options_shared_lib(executorch)
 endif()
 
 # custom ops library
-if(EXECUTORCH_BUILD_CUSTOM)
-  add_subdirectory(custom_ops)
-endif()
+add_subdirectory(custom_ops)
 
 # llama_runner library
 add_subdirectory(runner)
 
+target_include_directories(llama_main PUBLIC
+${CMAKE_CURRENT_SOURCE_DIR}/../../../backends/xnnpack/third-party/cpuinfo/include)
+target_include_directories(llama_main PUBLIC
+${CMAKE_CURRENT_SOURCE_DIR}/../../../backends/xnnpack/third-party/pthreadpool/include)
+
 set(link_libraries)
-set(_srcs main.cpp)
 
 if(EXECUTORCH_BUILD_OPTIMIZED)
-  list(
-    APPEND
-    link_libraries
-    optimized_native_cpu_ops_lib
-    optimized_kernels
-    portable_kernels
-    cpublas
-    eigen_blas)
+  list(APPEND link_libraries optimized_native_cpu_ops_lib optimized_kernels
+  portable_kernels cpublas eigen_blas)
   target_link_options_shared_lib(optimized_native_cpu_ops_lib)
 else()
   list(APPEND link_libraries portable_ops_lib portable_kernels)
   target_link_options_shared_lib(portable_ops_lib)
 endif()
 
-if(EXECUTORCH_BUILD_CUSTOM)
-  target_link_options_shared_lib(custom_ops)
-  list(APPEND link_libraries custom_ops)
-endif()
+target_link_libraries(llama_main PUBLIC gflags llama_runner custom_ops_lib)
 
 # XNNPACK pthreadpool cpuinfo
 if(TARGET xnnpack_backend)
   set(xnnpack_backend_libs xnnpack_backend XNNPACK pthreadpool cpuinfo)
   list(APPEND link_libraries ${xnnpack_backend_libs})
-  # HACK: main only include these when xnnpack backend is availabe, so that we
-  # have all the threadpool sources under xnnpack.
-  list(APPEND _common_compile_options -DET_USE_THREADPOOL)
-  list(
-    APPEND
-    _srcs
-    ${CMAKE_CURRENT_SOURCE_DIR}/../../../backends/xnnpack/threadpool/cpuinfo_utils.cpp
-  )
-  list(
-    APPEND
-    _common_include_directories
-    ${CMAKE_CURRENT_SOURCE_DIR}/../../../backends/xnnpack/third-party/cpuinfo/include
-    ${CMAKE_CURRENT_SOURCE_DIR}/../../../backends/xnnpack/third-party/pthreadpool/include
-  )
-  # end of hack
   target_link_options_shared_lib(xnnpack_backend)
 endif()
 
@@ -130,19 +114,15 @@ if(TARGET qnn_executorch_backend)
   target_link_options_shared_lib(qnn_executorch_backend)
 endif()
 
-# This one is needed for cpuinfo where it uses android specific log lib
+# This one is needed for cpuinfo where it uses android
+# specific log lib
 if(ANDROID)
   list(APPEND link_libraries log)
 endif()
 
-add_executable(llama_main ${_srcs})
-if(CMAKE_BUILD_TYPE EQUAL "RELEASE")
-  target_link_options(llama_main PRIVATE "LINKER:--gc-sections")
-endif()
-
-target_include_directories(llama_main PUBLIC ${_common_include_directories})
-target_link_libraries(llama_main PUBLIC gflags llama_runner ${link_libraries})
-target_compile_options(llama_main PUBLIC ${_common_compile_options})
+target_compile_options(llama_main PUBLIC ${_common_compile_options}
+  -DET_USE_THREADPOOL)
+target_link_libraries(llama_main PUBLIC ${link_libraries})
 
 if(APPLE)
   target_link_options_shared_lib(executorch)

@@ -18,7 +18,7 @@ runtime.python_library(
     ],
     deps = [
         "//caffe2:torch",
-        "//executorch/examples/models/llama2/custom_ops:custom_ops_aot_py",
+        "//executorch/examples/models/llama2/custom_ops:llama_custom_ops_aot_lib",
     ],
 )
 
@@ -52,7 +52,6 @@ runtime.python_binary(
     main_module = "executorch.examples.models.llama2.export_llama",
     # visibility = ["//executorch/examples/..."],
     preload_deps = [
-        "//executorch/examples/models/llama2/custom_ops:custom_ops_aot_lib",
         "//executorch/kernels/quantized:aot_lib",
     ],
     deps = [