Add xnnpack to llama runner mac & linux CI job (#2677)

lucylq · facebook-github-bot · commit 66c5fc89d91a · 2024-03-27T12:14:20.000-07:00
Summary: Pull Request resolved: #2677 Remake of D55290786, which is ghstack poisoned Reviewed By: kimishpatel, digantdesai Differential Revision: D55349949 fbshipit-source-id: b840dfee42b7d1ad349d766e3956c1ab8687034d
diff --git a/.ci/scripts/setup-macos.sh b/.ci/scripts/setup-macos.sh
@@ -104,6 +104,12 @@ print_cmake_info() {
   codesign -f -s - "${CMAKE_EXEC}" || true
 }
 
+setup_macos_env_variables() {
+  CMAKE_PREFIX_PATH=$(python -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')
+  export CMAKE_PREFIX_PATH
+}
+
+setup_macos_env_variables
 # NB: we need buck2 in all cases because cmake build also depends on calling
 # buck2 atm
 install_buck
diff --git a/.ci/scripts/test_llama.sh b/.ci/scripts/test_llama.sh
@@ -12,7 +12,11 @@ source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
 MODEL_NAME=$1 # stories110M.pt
 BUILD_TOOL=$2 # buck2 or cmake
 DTYPE=$3 # fp16 or fp32
-
+MODE=${4:-"xnnpack"} # portable or xnnpack
+if [[ $# -lt 4 ]]; then # Assuming 4 mandatory args
+    echo "Expecting atleast 4 positional arguments"
+    echo "Usage: [...]"
+fi
 if [[ -z "${MODEL_NAME:-}" ]]; then
   echo "Missing model name, exiting..."
   exit 1
@@ -28,6 +32,11 @@ if [[ -z "${DTYPE:-}" ]]; then
   exit 1
 fi
 
+if [[ -z "${MODE:-}" ]]; then
+  echo "Missing mode, choose portable or xnnpack, exiting..."
+  exit 1
+fi
+
 if [[ -z "${BUCK:-}" ]]; then
   BUCK=buck2
 fi
@@ -42,12 +51,18 @@ which "${PYTHON_EXECUTABLE}"
 cmake_install_executorch_libraries() {
     echo "Installing libexecutorch.a, libextension_module.so, libportable_ops_lib.a"
     rm -rf cmake-out
+    if [[ "${MODE}" == "xnnpack" ]]; then
+      XNNPACK=ON
+    else
+      XNNPACK=OFF
+    fi
     retry cmake -DBUCK2="$BUCK" \
         -DCMAKE_INSTALL_PREFIX=cmake-out \
         -DCMAKE_BUILD_TYPE=Release \
         -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
         -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
         -DEXECUTORCH_BUILD_OPTIMIZED=ON \
+        -DEXECUTORCH_BUILD_XNNPACK="$XNNPACK" \
         -DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
         -Bcmake-out .
     cmake --build cmake-out -j9 --target install --config Release
@@ -101,7 +116,11 @@ fi
 # Export model.
 EXPORTED_MODEL_NAME="${EXPORTED_MODEL_NAME}.pte"
 echo "Exporting ${EXPORTED_MODEL_NAME}"
-$PYTHON_EXECUTABLE -m examples.models.llama2.export_llama -c stories110M.pt -p "${PARAMS}" -d "${DTYPE}"
+EXPORT_ARGS="-c stories110M.pt -p ${PARAMS} -d ${DTYPE} -n ${EXPORTED_MODEL_NAME}"
+if [[ "${MODE}" == "xnnpack" ]]; then
+  EXPORT_ARGS="${EXPORT_ARGS} --pt2e_quantize xnnpack_dynamic"
+fi
+$PYTHON_EXECUTABLE -m examples.models.llama2.export_llama ${EXPORT_ARGS}
 
 # Create tokenizer.bin.
 echo "Creating tokenizer.bin"
diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
@@ -90,6 +90,7 @@ jobs:
       matrix:
         dtype: [fp32]
         build-tool: [buck2, cmake]
+        mode: [portable, xnnpack]
       fail-fast: false
     with:
       runner: linux.2xlarge
@@ -104,13 +105,14 @@ jobs:
 
         DTYPE=${{ matrix.dtype }}
         BUILD_TOOL=${{ matrix.build-tool }}
+        MODE=${{ matrix.mode }}
 
         # Setup executorch
         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh buck2
         # Install requirements for export_llama
         PYTHON_EXECUTABLE=python bash examples/models/llama2/install_requirements.sh
         # Test llama2
-        PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh stories110M.pt "${BUILD_TOOL}" "${DTYPE}"
+        PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh stories110M.pt "${BUILD_TOOL}" "${DTYPE}" "${MODE}"
 
   test-custom-ops-linux:
     name: test-custom-ops-linux
diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml
@@ -215,3 +215,63 @@ jobs:
         # Build and test coreml delegate
         PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/coreml/scripts/build_all.sh
         popd
+
+  test-pybind-build-macos:
+    name: test-pybind-build-macos
+    uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
+    strategy:
+      matrix:
+        include:
+          - build-tool: cmake
+      fail-fast: false
+    with:
+      runner: macos-m1-stable
+      python-version: '3.11'
+      submodules: 'true'
+      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      timeout: 180
+      script: |
+        WORKSPACE=$(pwd)
+        pushd "${WORKSPACE}/pytorch/executorch"
+        bash .ci/scripts/setup-conda.sh
+
+        # build module for executorch.extension.pybindings.portable_lib
+        BUILD_TOOL=${{ matrix.build-tool }}
+        EXECUTORCH_BUILD_PYBIND=ON PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}"
+
+        # see if we can import the module successfully
+        ${CONDA_RUN} python -c "from executorch.extension.pybindings import portable_lib; print('success!')"
+        popd
+
+  test-llama-runner-macos:
+    name: test-llama-runner-mac
+    uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
+    strategy:
+      matrix:
+        dtype: [fp32]
+        build-tool: [buck2, cmake]
+        mode: [portable, xnnpack]
+      fail-fast: false
+    with:
+      runner: macos-m1-stable
+      python-version: '3.11'
+      submodules: 'true'
+      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      timeout: 900
+      script: |
+        WORKSPACE=$(pwd)
+        pushd "${WORKSPACE}/pytorch/executorch"
+        bash .ci/scripts/setup-conda.sh
+
+        DTYPE=${{ matrix.dtype }}
+        BUILD_TOOL=${{ matrix.build-tool }}
+        MODE=${{ matrix.mode }}
+
+        # Setup executorch
+        PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}"
+
+        # Install requirements for export_llama
+        PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/models/llama2/install_requirements.sh
+        # Test llama2
+        PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_llama.sh stories110M.pt "${BUILD_TOOL}" "${DTYPE}" "${MODE}"
+        popd
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -445,7 +445,7 @@ if(EXECUTORCH_BUILD_PYBIND)
   # find pytorch lib, to allow pybind to take at::Tensor as input/output
   find_package(Torch CONFIG REQUIRED)
   find_library(TORCH_PYTHON_LIBRARY torch_python
-               PATHS "${TORCH_INSTALL_PREFIX}/lib")
+              PATHS "${TORCH_INSTALL_PREFIX}/lib")
 
   # compile options for pybind
 
diff --git a/examples/models/llama2/CMakeLists.txt b/examples/models/llama2/CMakeLists.txt
@@ -56,21 +56,15 @@ find_package(executorch CONFIG REQUIRED)
 # llama_runner library
 add_subdirectory(runner)
 
-set(link_options)
 set(link_libraries)
 
 if(EXECUTORCH_BUILD_OPTIMIZED)
-  list(APPEND link_libraries optimized_native_cpu_ops_lib optimized_kernels portable_kernels)
-  list(APPEND link_options
-                      "SHELL:LINKER:--whole-archive \
-                      $<TARGET_FILE:optimized_native_cpu_ops_lib> \
-                      LINKER:--no-whole-archive")
+  list(APPEND link_libraries optimized_native_cpu_ops_lib optimized_kernels
+    portable_kernels)
+  target_link_options_shared_lib(optimized_native_cpu_ops_lib)
 else()
   list(APPEND link_libraries portable_ops_lib portable_kernels)
-  list(APPEND link_options
-                      "SHELL:LINKER:--whole-archive \
-                      $<TARGET_FILE:portable_ops_lib> \
-                      LINKER:--no-whole-archive")
+  target_link_options_shared_lib(portable_ops_lib)
 endif()
 
 target_link_libraries(llama_main PUBLIC gflags llama_runner)
@@ -79,24 +73,21 @@ target_link_libraries(llama_main PUBLIC gflags llama_runner)
 if(TARGET xnnpack_backend)
   set(xnnpack_backend_libs xnnpack_backend XNNPACK pthreadpool cpuinfo)
   list(APPEND link_libraries ${xnnpack_backend_libs})
-  list(APPEND link_options
-                      "SHELL:LINKER:--whole-archive \
-                      $<TARGET_FILE:xnnpack_backend> \
-                      LINKER:--no-whole-archive")
+  target_link_options_shared_lib(xnnpack_backend)
 endif()
 
 # Vulkan backend
 if(TARGET vulkan_backend)
   list(APPEND link_libraries vulkan_backend)
-  list(APPEND link_options
-                      "SHELL:LINKER:--whole-archive \
-                      $<TARGET_FILE:vulkan_backend> \
-                      LINKER:--no-whole-archive")
+  target_link_options_shared_lib(vulkan_backend)
 endif()
 
 target_compile_options(llama_main PUBLIC ${_common_compile_options})
 target_link_libraries(llama_main PUBLIC ${link_libraries})
-target_link_options(llama_main PUBLIC ${link_options})
+
+if(APPLE)
+  target_link_options_shared_lib(executorch)
+endif()
 
 # Print all summary
 executorch_print_configuration_summary()
diff --git a/examples/models/llama2/runner/CMakeLists.txt b/examples/models/llama2/runner/CMakeLists.txt
@@ -39,7 +39,7 @@ list(TRANSFORM _llama_runner__srcs PREPEND "${EXECUTORCH_ROOT}/")
 target_include_directories(extension_module
                            INTERFACE ${_common_include_directories})
 
-if(CMAKE_TOOLCHAIN_IOS OR CMAKE_TOOLCHAIN_ANDROID)
+if(CMAKE_TOOLCHAIN_IOS OR CMAKE_TOOLCHAIN_ANDROID OR APPLE)
   # Building a share library on iOS requires code signing
   # On Android we see duplicated registration when using shared lib
   add_library(llama_runner STATIC ${_llama_runner__srcs})
diff --git a/extension/module/CMakeLists.txt b/extension/module/CMakeLists.txt
@@ -17,7 +17,7 @@ if(NOT EXECUTORCH_ROOT)
 endif()
 
 list(TRANSFORM _extension_module__srcs PREPEND "${EXECUTORCH_ROOT}/")
-if(CMAKE_TOOLCHAIN_IOS OR CMAKE_TOOLCHAIN_ANDROID)
+if(CMAKE_TOOLCHAIN_IOS OR CMAKE_TOOLCHAIN_ANDROID OR APPLE)
   # Building a share library on iOS requires code signing
   # On Android we see duplicated registration when using shared lib
   add_library(extension_module STATIC ${_extension_module__srcs})