Skip to content

Commit 48e60d7

Browse files
committed
Add quantized op support to llama runner
Summary: Test Plan: Reviewers: Subscribers: Tasks: Tags: ghstack-source-id: 15fec06 Pull Request resolved: #3062
1 parent 25a5595 commit 48e60d7

File tree

5 files changed

+13
-4
lines changed

5 files changed

+13
-4
lines changed

.ci/scripts/test_quantized_aot_lib.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ build_cmake_quantized_aot_lib() {
2424
&& retry cmake -DBUCK2=buck2 \
2525
-DCMAKE_BUILD_TYPE=Release \
2626
-DCMAKE_PREFIX_PATH="$CMAKE_PREFIX_PATH" \
27-
-DEXECUTORCH_BUILD_QUANTIZED=ON \
27+
-DEXECUTORCH_BUILD_QUANTIZED_OPS_AOT=ON \
2828
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" ..)
2929

3030
cmake --build ${CMAKE_OUTPUT_DIR} -j4

build/executorch-config.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ set(lib_list
3838
etdump bundled_program extension_data_loader ${FLATCCRT_LIB} mpsdelegate
3939
qnn_executorch_backend portable_ops_lib extension_module xnnpack_backend
4040
XNNPACK cpuinfo pthreadpool vulkan_backend optimized_kernels cpublas eigen_blas
41-
optimized_ops_lib optimized_native_cpu_ops_lib
41+
optimized_ops_lib optimized_native_cpu_ops_lib quantized_kernels quantized_ops_lib
4242
)
4343
foreach(lib ${lib_list})
4444
# Name of the variable which stores result of the find_library search

examples/models/llama2/CMakeLists.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ add_subdirectory(runner)
9191
if(EXECUTORCH_USE_TIKTOKEN)
9292
# find RE2 for tokenizer
9393
set(ABSL_ENABLE_INSTALL ON)
94+
set(ABSL_PROPAGATE_CXX_STD ON)
9495
set(_pic_flag
9596
${CMAKE_POSITION_INDEPENDENT_CODE})
9697
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
@@ -118,6 +119,11 @@ else()
118119
target_link_options_shared_lib(portable_ops_lib)
119120
endif()
120121

122+
if(EXECUTORCH_BUILD_QUANTIZED)
123+
list(APPEND link_libraries quantized_ops_lib quantized_kernels)
124+
target_link_options_shared_lib(quantized_ops_lib)
125+
endif()
126+
121127
if(EXECUTORCH_BUILD_CUSTOM)
122128
target_link_options_shared_lib(custom_ops)
123129
list(APPEND link_libraries custom_ops)

examples/models/llama2/quant_lib.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ def check_embedding_byte_registered():
105105
'Use `python -c "import torch as _; print(_.__path__)"` to find where torch package is installed.\n'
106106
"Set that as TORCH_PACKAGE_DIR.\n"
107107
"Then from root executorch dir do the following:\n"
108-
"rm -rf cmake-out && mkdir cmake-out && (cd cmake-out && cmake -DBUCK2=<path-to-buck2> -DCMAKE_PREFIX_PATH=$TORCH_PACKAGE_DIR -DEXECUTORCH_BUILD_QUANTIZED=ON ..) && cmake --build . -j16\n"
108+
"rm -rf cmake-out && mkdir cmake-out && (cd cmake-out && cmake -DBUCK2=<path-to-buck2> -DCMAKE_PREFIX_PATH=$TORCH_PACKAGE_DIR -DEXECUTORCH_BUILD_QUANTIZED_OPS_AOT=ON ..) && cmake --build . -j16\n"
109109
'To find the location of the lib: find cmake-out -name "libquantized_ops_aot_lib*"\n'
110110
"Then specify the said library via -s <path to libquantized_ops_aot_lib.so\n"
111111
)

kernels/quantized/CMakeLists.txt

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@
1010
# ~~~
1111
cmake_minimum_required(VERSION 3.19)
1212

13+
option(EXECUTORCH_BUILD_QUANTIZED_OPS_AOT
14+
"Build the optimized ops library for AOT export usage" OFF)
15+
1316
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
1417
if(NOT CMAKE_CXX_STANDARD)
1518
set(CMAKE_CXX_STANDARD 17)
@@ -49,7 +52,7 @@ message("Generated files ${gen_command_sources}")
4952
# quantized_ops_aot_lib quantized_ops_lib but none of these is a common
5053
# dependency of the other(s). This is not allowed by the Xcode "new build
5154
# system".
52-
if(NOT CMAKE_GENERATOR STREQUAL "Xcode")
55+
if(NOT CMAKE_GENERATOR STREQUAL "Xcode" AND EXECUTORCH_BUILD_QUANTIZED_OPS_AOT)
5356
# Build a AOT library to register quantized ops into PyTorch. This is a hack.
5457
set(_quantized_sources
5558
${_quantized_kernels__srcs}

0 commit comments

Comments
 (0)