[llm] Fix llama/README.md (#11543)

larryliu0820 · web-flow · commit 5a45132d5894 · 2025-06-11T16:45:14.000-07:00
This PR simplifies README.md's cmake command by using preset.
This pull request simplifies the build process for the Llama model and
its runner by introducing a preset configuration and removing redundant
build flags. Additionally, it updates the `CMakeLists.txt` file to
enable specific features for tokenizers.

### Build process simplification:

*
[`examples/models/llama/README.md`](diffhunk://#diff-535f376de1f099ede770ee4d5b3c3193b5784c6a0342e292e667fe4ff9d1633eL272-L298):
Replaced the detailed `cmake` commands with a preset configuration
(`--preset llm`) for building executorch and removed redundant flags for
the Llama runner build process. This streamlines the instructions and
reduces complexity.

### Tokenizer configuration updates:

*
[`extension/llm/runner/CMakeLists.txt`](diffhunk://#diff-ab47c38904702e3d66a37419ca35a07815f7d4735f7e94330d17643b9f77ad2bR47-R48):
Added `SUPPORT_REGEX_LOOKAHEAD` and `PCRE2_STATIC_PIC` settings to
enable regex lookahead support and ensure position-independent code for
tokenizers.
diff --git a/.ci/scripts/test_llama.sh b/.ci/scripts/test_llama.sh
@@ -156,8 +156,7 @@ cmake_install_executorch_libraries() {
         -DCMAKE_INSTALL_PREFIX=cmake-out \
         -DCMAKE_BUILD_TYPE="$CMAKE_BUILD_TYPE" \
         -DEXECUTORCH_BUILD_QNN="$QNN" \
-        -DQNN_SDK_ROOT="$QNN_SDK_ROOT" \
-        -Bcmake-out .
+        -DQNN_SDK_ROOT="$QNN_SDK_ROOT"
     cmake --build cmake-out -j9 --target install --config "$CMAKE_BUILD_TYPE"
 }
 
diff --git a/examples/models/llama/README.md b/examples/models/llama/README.md
@@ -269,33 +269,17 @@ You can export and run the original Llama 3 8B instruct model.
 
 1. Build executorch with optimized CPU performance as follows. Build options available [here](https://github.com/pytorch/executorch/blob/main/CMakeLists.txt#L59).
     ```
-    cmake -DPYTHON_EXECUTABLE=python \
-        -DCMAKE_INSTALL_PREFIX=cmake-out \
-        -DEXECUTORCH_ENABLE_LOGGING=1 \
-        -DCMAKE_BUILD_TYPE=Release \
-        -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
-        -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
-        -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
-        -DEXECUTORCH_BUILD_XNNPACK=ON \
-        -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
-        -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
-        -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
-        -Bcmake-out .
+    cmake --preset llm -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=cmake-out
 
     cmake --build cmake-out -j16 --target install --config Release
     ```
 Note for Mac users: There's a known linking issue with Xcode 15.1. Refer to the section of Common Issues and Mitigations below for solutions.
 
 2. Build llama runner.
     ```
-    cmake -DPYTHON_EXECUTABLE=python \
-        -DCMAKE_INSTALL_PREFIX=cmake-out \
+    cmake -DCMAKE_INSTALL_PREFIX=cmake-out \
+        -DBUILD_TESTING=OFF \
         -DCMAKE_BUILD_TYPE=Release \
-        -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
-        -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
-        -DEXECUTORCH_BUILD_XNNPACK=ON \
-        -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
-        -DSUPPORT_REGEX_LOOKAHEAD=ON
         -Bcmake-out/examples/models/llama \
         examples/models/llama
 
diff --git a/extension/llm/runner/CMakeLists.txt b/extension/llm/runner/CMakeLists.txt
@@ -44,6 +44,10 @@ target_include_directories(
 add_library(extension_llm_runner STATIC ${_extension_llm_runner__srcs})
 
 # add tokenizers
+set(SUPPORT_REGEX_LOOKAHEAD ON)
+# llama/runner/CMakeLists.txt builds a shared library libllama_runner.so that
+# transitively depends on tokenizers. Need to build tokenizers with -fPIC.
+set(CMAKE_POSITION_INDEPENDENT_CODE ON)
 add_subdirectory(
   ${EXECUTORCH_ROOT}/extension/llm/tokenizers
   ${CMAKE_CURRENT_BINARY_DIR}/../../../extension/llm/tokenizers
@@ -54,8 +58,9 @@ set(runner_deps executorch_core extension_module extension_tensor tokenizers)
 target_link_libraries(extension_llm_runner PUBLIC ${runner_deps})
 
 target_include_directories(
-  extension_llm_runner INTERFACE ${_common_include_directories}
-                                 ${EXECUTORCH_ROOT}/extension/llm/tokenizers/include
+  extension_llm_runner
+  INTERFACE ${_common_include_directories}
+            ${EXECUTORCH_ROOT}/extension/llm/tokenizers/include
 )
 
 if(BUILD_TESTING)

Original file line number	Diff line number	Diff line change
`@@ -156,8 +156,7 @@ cmake_install_executorch_libraries() {`
`156`	`156`	`-DCMAKE_INSTALL_PREFIX=cmake-out \`
`157`	`157`	`-DCMAKE_BUILD_TYPE="$CMAKE_BUILD_TYPE" \`
`158`	`158`	`-DEXECUTORCH_BUILD_QNN="$QNN" \`
`159`		`- -DQNN_SDK_ROOT="$QNN_SDK_ROOT" \`
`160`		`- -Bcmake-out .`
	`159`	`+ -DQNN_SDK_ROOT="$QNN_SDK_ROOT"`
`161`	`160`	`cmake --build cmake-out -j9 --target install --config "$CMAKE_BUILD_TYPE"`
`162`	`161`	`}`
`163`	`162`