Skip to content

Commit 41abbb5

Browse files
committed
Update on "Add quantized op support to llama runner"
Summary: Test Plan: Reviewers: Subscribers: Tasks: Tags: Differential Revision: [D56197863](https://our.internmc.facebook.com/intern/diff/D56197863) [ghstack-poisoned]
2 parents 3291cb7 + bfcf4c0 commit 41abbb5

File tree

6 files changed

+24
-43
lines changed

6 files changed

+24
-43
lines changed

.ci/scripts/test_llama.sh

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,6 @@ cmake_install_executorch_libraries() {
7575
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
7676
-DEXECUTORCH_BUILD_CUSTOM="$CUSTOM" \
7777
-DEXECUTORCH_BUILD_OPTIMIZED=ON \
78-
-DEXECUTORCH_BUILD_QUANTIZED="$QE" \
7978
-DEXECUTORCH_BUILD_XNNPACK="$XNNPACK" \
8079
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
8180
-Bcmake-out .
@@ -91,7 +90,6 @@ cmake_build_llama_runner() {
9190
-DEXECUTORCH_BUILD_CUSTOM="$CUSTOM" \
9291
-DEXECUTORCH_BUILD_OPTIMIZED=ON \
9392
-DEXECUTORCH_BUILD_XNNPACK="$XNNPACK" \
94-
-DEXECUTORCH_BUILD_QUANTIZED="$QE" \
9593
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
9694
-Bcmake-out/${dir} \
9795
${dir}

CMakeLists.txt

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -164,8 +164,6 @@ option(EXECUTORCH_BUILD_QNN "Build the Qualcomm backend" OFF)
164164

165165
option(EXECUTORCH_BUILD_OPTIMIZED "Build the optimized kernels" OFF)
166166

167-
option(EXECUTORCH_BUILD_QUANTIZED "Build the quantized kernels" OFF)
168-
169167
option(EXECUTORCH_BUILD_SDK "Build the ExecuTorch SDK")
170168

171169
option(EXECUTORCH_BUILD_SIZE_TEST "Build the size test" OFF)
@@ -413,9 +411,7 @@ if(EXECUTORCH_BUILD_OPTIMIZED)
413411
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/kernels/optimized)
414412
endif()
415413

416-
if(EXECUTORCH_BUILD_QUANTIZED)
417-
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/kernels/quantized)
418-
endif()
414+
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/kernels/quantized)
419415

420416
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/configurations)
421417

@@ -445,19 +441,14 @@ cmake_dependent_option(
445441
EXECUTORCH_BUILD_HOST_TARGETS OFF)
446442
if(EXECUTORCH_BUILD_EXECUTOR_RUNNER)
447443
# Baseline libraries that executor_runner will link against.
448-
set(_executor_runner_libs executorch gflags)
444+
set(_executor_runner_libs executorch gflags quantized_ops_lib)
449445

450446
if(EXECUTORCH_BUILD_OPTIMIZED)
451447
list(APPEND _executor_runner_libs optimized_native_cpu_ops_lib)
452448
else()
453449
list(APPEND _executor_runner_libs portable_ops_lib)
454450
endif()
455451

456-
# Generate lib to register quantized ops
457-
if(EXECUTORCH_BUILD_QUANTIZED)
458-
list(APPEND _executor_runner_libs quantized_ops_lib)
459-
endif()
460-
461452
add_executable(executor_runner ${_executor_runner__srcs})
462453
if(CMAKE_BUILD_TYPE STREQUAL "Release" AND NOT APPLE)
463454
target_link_options(executor_runner PRIVATE "LINKER:--gc-sections")

build/Utils.cmake

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,8 +74,6 @@ function(executorch_print_configuration_summary)
7474
STATUS " EXECUTORCH_BUILD_QNN : ${EXECUTORCH_BUILD_QNN}")
7575
message(STATUS " EXECUTORCH_BUILD_OPTIMIZED : "
7676
"${EXECUTORCH_BUILD_OPTIMIZED}")
77-
message(STATUS " EXECUTORCH_BUILD_QUANTIZED : "
78-
"${EXECUTORCH_BUILD_QUANTIZED}")
7977
message(
8078
STATUS " EXECUTORCH_BUILD_SDK : ${EXECUTORCH_BUILD_SDK}")
8179
message(

build/build_apple_frameworks.sh

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ CUSTOM=OFF
2222
MPS=OFF
2323
OPTIMIZED=OFF
2424
PORTABLE=OFF
25-
QUANTIZED=OFF
25+
QUANTIZED=ON
2626
XNNPACK=OFF
2727
HEADERS_PATH="include"
2828
EXECUTORCH_FRAMEWORK="executorch:libexecutorch.a,libexecutorch_no_prim_ops.a,libextension_apple.a,libextension_data_loader.a,libextension_module.a:$HEADERS_PATH"
@@ -51,7 +51,6 @@ usage() {
5151
echo " --mps Include this flag to build the Metal Performance Shaders backend."
5252
echo " --optimized Include this flag to build the Optimized backend."
5353
echo " --portable Include this flag to build the Portable backend."
54-
echo " --quantized Include this flag to build the Quantized backend."
5554
echo " --xnnpack Include this flag to build the XNNPACK backend."
5655
echo
5756
echo "Example:"
@@ -74,7 +73,6 @@ for arg in "$@"; do
7473
--mps) MPS=ON ;;
7574
--optimized) OPTIMIZED=ON ;;
7675
--portable) PORTABLE=ON ;;
77-
--quantized) QUANTIZED=ON ;;
7876
--xnnpack) XNNPACK=ON ;;
7977
*)
8078
if [[ -z "$SOURCE_ROOT_DIR" ]]; then
@@ -137,7 +135,6 @@ cmake_build() {
137135
-DEXECUTORCH_BUILD_CUSTOM=$CUSTOM \
138136
-DEXECUTORCH_BUILD_MPS=$MPS \
139137
-DEXECUTORCH_BUILD_OPTIMIZED=$OPTIMIZED \
140-
-DEXECUTORCH_BUILD_QUANTIZED=$QUANTIZED \
141138
-DEXECUTORCH_BUILD_XNNPACK=$XNNPACK \
142139
${platform_flag:+-DIOS_PLATFORM=$platform_flag}
143140
cmake --build . --config $MODE
@@ -181,7 +178,7 @@ append_framework_flag "$CUSTOM" "$CUSTOM_FRAMEWORK"
181178
append_framework_flag "$MPS" "$MPS_FRAMEWORK"
182179
append_framework_flag "$OPTIMIZED" "$OPTIMIZED_FRAMEWORK"
183180
append_framework_flag "$PORTABLE" "$PORTABLE_FRAMEWORK"
184-
append_framework_flag "$QUANTIZED" "$QUANTIZED_FRAMEWORK"
181+
append_framework_flag "ON" "$QUANTIZED_FRAMEWORK"
185182
append_framework_flag "$XNNPACK" "$XNNPACK_FRAMEWORK"
186183

187184
"$SOURCE_ROOT_DIR"/build/create_frameworks.sh "${FRAMEWORK_FLAGS[@]}"

examples/models/llama2/CMakeLists.txt

Lines changed: 19 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -120,28 +120,25 @@ else()
120120
target_link_options_shared_lib(portable_ops_lib)
121121
endif()
122122

123-
if(EXECUTORCH_BUILD_QUANTIZED)
124-
# TODO(larryliu0820): after we delete llama_quantized ops we should be able to reuse
125-
# quantized_kernels and quantized_ops_lib directly.
126-
merge_yaml(
127-
FUNCTIONS_YAML ${CMAKE_CURRENT_SOURCE_DIR}/ops/quantized.yaml
128-
FALLBACK_YAML ${EXECUTORCH_ROOT}/kernels/quantized/quantized.yaml
129-
OUTPUT_DIR ${CMAKE_CURRENT_BINARY_DIR})
130-
131-
gen_selected_ops("${CMAKE_CURRENT_BINARY_DIR}/merged.yaml" "" "")
132-
generate_bindings_for_kernels(
133-
FUNCTIONS_YAML ${CMAKE_CURRENT_BINARY_DIR}/merged.yaml)
134-
message("Generated files ${gen_command_sources}")
135-
136-
# quantized_merge_ops_lib: Register quantized op kernels into the runtime
137-
gen_operators_lib(
138-
"quantized_merge_ops_lib"
139-
KERNEL_LIBS quantized_kernels
140-
DEPS executorch)
141-
target_include_directories(quantized_merge_ops_lib PUBLIC ${_common_include_directories})
142-
target_link_options_shared_lib(quantized_merge_ops_lib)
143-
list(APPEND link_libraries quantized_kernels quantized_merge_ops_lib)
144-
endif()
123+
# quantized ops yaml file operation
124+
merge_yaml(
125+
FUNCTIONS_YAML ${CMAKE_CURRENT_SOURCE_DIR}/ops/quantized.yaml
126+
FALLBACK_YAML ${EXECUTORCH_ROOT}/kernels/quantized/quantized.yaml
127+
OUTPUT_DIR ${CMAKE_CURRENT_BINARY_DIR})
128+
129+
gen_selected_ops("${CMAKE_CURRENT_BINARY_DIR}/merged.yaml" "" "")
130+
generate_bindings_for_kernels(
131+
FUNCTIONS_YAML ${CMAKE_CURRENT_BINARY_DIR}/merged.yaml)
132+
message("Generated files ${gen_command_sources}")
133+
134+
# quantized_merge_ops_lib: Register quantized op kernels into the runtime
135+
gen_operators_lib(
136+
"quantized_merge_ops_lib"
137+
KERNEL_LIBS quantized_kernels
138+
DEPS executorch)
139+
target_include_directories(quantized_merge_ops_lib PUBLIC ${_common_include_directories})
140+
target_link_options_shared_lib(quantized_merge_ops_lib)
141+
list(APPEND link_libraries quantized_kernels quantized_merge_ops_lib)
145142

146143
if(EXECUTORCH_BUILD_CUSTOM)
147144
target_link_options_shared_lib(custom_ops)

examples/models/llama2/runner/targets.bzl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ def _get_operator_lib(aten = False):
44
if aten:
55
return ["//executorch/kernels/aten:generated_lib"]
66
elif runtime.is_oss:
7-
return ["//executorch/kernels/portable:generated_lib", "//executorch/examples/models/llama2/custom_ops:custom_ops"]
7+
return ["//executorch/kernels/portable:generated_lib", "//executorch/examples/models/llama2/custom_ops:custom_ops", "//executorch/examples/models/llama2/ops:generated_lib"]
88
else:
99
return ["//executorch/configurations:optimized_native_cpu_ops", "//executorch/examples/models/llama2/custom_ops:custom_ops", "//executorch/examples/models/llama2/ops:generated_lib"]
1010

0 commit comments

Comments
 (0)