Skip to content

Commit 7626b7b

Browse files
committed
Update on "[ET-VK] Enable additional specialization constants in compute shaders"
## Context Building on top of the previous changeset in the stack, this changeset modifies shader dispatch APIs to accept additional specialization constants for a shader. Differential Revision: [D56225042](https://our.internmc.facebook.com/intern/diff/D56225042/) [ghstack-poisoned]
2 parents bdc7896 + 44f704e commit 7626b7b

File tree

29 files changed

+435
-64
lines changed

29 files changed

+435
-64
lines changed

.ci/scripts/test_llama.sh

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
1212
MODEL_NAME=$1 # stories110M.pt
1313
BUILD_TOOL=$2 # buck2 or cmake
1414
DTYPE=$3 # fp16 or fp32
15-
MODE=${4:-"xnnpack"} # portable or xnnpack
15+
MODE=${4:-"xnnpack+custom"} # portable or xnnpack+custom or xnnpack+custom+qe
1616
if [[ $# -lt 4 ]]; then # Assuming 4 mandatory args
1717
echo "Expecting atleast 4 positional arguments"
1818
echo "Usage: [...]"
@@ -37,7 +37,7 @@ if [[ -z "${MODE:-}" ]]; then
3737
exit 1
3838
fi
3939

40-
if [[ "${MODE}" =~ xnnpack.* ]]; then
40+
if [[ "${MODE}" =~ .*xnnpack.* ]]; then
4141
XNNPACK=ON
4242
else
4343
XNNPACK=OFF
@@ -49,6 +49,12 @@ else
4949
CUSTOM=OFF
5050
fi
5151

52+
if [[ "${MODE}" =~ .*qe.* ]]; then
53+
QE=ON
54+
else
55+
QE=OFF
56+
fi
57+
5258
if [[ -z "${BUCK:-}" ]]; then
5359
BUCK=buck2
5460
fi
@@ -84,7 +90,6 @@ cmake_build_llama_runner() {
8490
-DEXECUTORCH_BUILD_CUSTOM="$CUSTOM" \
8591
-DEXECUTORCH_BUILD_OPTIMIZED=ON \
8692
-DEXECUTORCH_BUILD_XNNPACK="$XNNPACK" \
87-
-DEXECUTORCH_BUILD_OPTIMIZED=ON \
8893
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
8994
-Bcmake-out/${dir} \
9095
${dir}
@@ -126,9 +131,15 @@ fi
126131
# Export model.
127132
EXPORTED_MODEL_NAME="${EXPORTED_MODEL_NAME}.pte"
128133
echo "Exporting ${EXPORTED_MODEL_NAME}"
129-
EXPORT_ARGS="-c stories110M.pt -p ${PARAMS} -d ${DTYPE} -n ${EXPORTED_MODEL_NAME}"
130-
if [[ "${MODE}" == "xnnpack+kv+custom" ]]; then
131-
EXPORT_ARGS="${EXPORT_ARGS} -kv --use_sdpa_with_kv_cache -X -qmode 8da4w -G 128"
134+
EXPORT_ARGS="-c stories110M.pt -p ${PARAMS} -d ${DTYPE} -n ${EXPORTED_MODEL_NAME} -kv"
135+
if [[ "${XNNPACK}" == "ON" ]]; then
136+
EXPORT_ARGS="${EXPORT_ARGS} -X -qmode 8da4w -G 128"
137+
fi
138+
if [[ "${CUSTOM}" == "ON" ]]; then
139+
EXPORT_ARGS="${EXPORT_ARGS} --use_sdpa_with_kv_cache"
140+
fi
141+
if [[ "${QE}" == "ON" ]]; then
142+
EXPORT_ARGS="${EXPORT_ARGS} --embedding-quantize 8,1024"
132143
fi
133144
# Add dynamically linked library location
134145
$PYTHON_EXECUTABLE -m examples.models.llama2.export_llama ${EXPORT_ARGS}

.ci/scripts/test_quantized_aot_lib.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ build_cmake_quantized_aot_lib() {
2424
&& retry cmake -DBUCK2=buck2 \
2525
-DCMAKE_BUILD_TYPE=Release \
2626
-DCMAKE_PREFIX_PATH="$CMAKE_PREFIX_PATH" \
27-
-DEXECUTORCH_BUILD_QUANTIZED=ON \
27+
-DEXECUTORCH_BUILD_QUANTIZED_OPS_AOT=ON \
2828
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" ..)
2929

3030
cmake --build ${CMAKE_OUTPUT_DIR} -j4

.github/workflows/android.yml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@ on:
1010
- .ci/docker/**
1111
- .github/workflows/android.yml
1212
- install_requirements.sh
13-
- examples/demo-apps/**
13+
- examples/demo-apps/android/**
14+
- extension/android/**
1415
- extension/module/**
1516
workflow_dispatch:
1617

@@ -101,7 +102,7 @@ jobs:
101102
android-app-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/app-debug.apk
102103
android-test-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/app-debug-androidTest.apk
103104
# The test spec can be downloaded from https://ossci-assets.s3.amazonaws.com/android-llama2-device-farm-test-spec.yml
104-
test-spec: arn:aws:devicefarm:us-west-2:308535385114:upload:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/414cb54d-4d83-4576-8317-93244e4dc50e
105+
test-spec: arn:aws:devicefarm:us-west-2:308535385114:upload:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/abd86868-fa63-467e-a5c7-218194665a77
105106
# The exported llama2 model and its tokenizer, can be downloaded from https://ossci-assets.s3.amazonaws.com/executorch-android-llama2-7b.zip.
106107
# Among the input, this is the biggest file and uploading it to AWS beforehand makes the test run much faster
107108
extra-data: arn:aws:devicefarm:us-west-2:308535385114:upload:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/bd15825b-ddab-4e47-9fef-a9c8935778dd

.github/workflows/doc-build.yml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,13 @@ jobs:
6868
make html
6969
cd ..
7070
71+
# If it's main branch, add noindex tag to all .html files to exclude from Google Search indexing.
72+
GITHUB_REF=${{ github.ref }}
73+
echo "GitHub Ref: ${GITHUB_REF}"
74+
if [[ "${{ github.ref }}" == 'refs/heads/main' ]]; then
75+
find docs/_build/html/ -name "*.html" -print0 | xargs -0 sed -i '/<head>/a \ \ <meta name="robots" content="noindex">';
76+
fi
77+
7178
cp -rf docs/_build/html/* "${RUNNER_DOCS_DIR}"
7279
7380
mv docs/_build/html "${RUNNER_ARTIFACT_DIR}"

.github/workflows/pull.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ jobs:
9090
matrix:
9191
dtype: [fp32]
9292
build-tool: [buck2, cmake]
93-
mode: [portable, xnnpack+kv+custom]
93+
mode: [portable, xnnpack+custom, xnnpack+custom+qe]
9494
fail-fast: false
9595
with:
9696
runner: linux.2xlarge

CMakeLists.txt

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -164,8 +164,6 @@ option(EXECUTORCH_BUILD_QNN "Build the Qualcomm backend" OFF)
164164

165165
option(EXECUTORCH_BUILD_OPTIMIZED "Build the optimized kernels" OFF)
166166

167-
option(EXECUTORCH_BUILD_QUANTIZED "Build the quantized kernels" OFF)
168-
169167
option(EXECUTORCH_BUILD_SDK "Build the ExecuTorch SDK")
170168

171169
option(EXECUTORCH_BUILD_SIZE_TEST "Build the size test" OFF)
@@ -413,9 +411,7 @@ if(EXECUTORCH_BUILD_OPTIMIZED)
413411
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/kernels/optimized)
414412
endif()
415413

416-
if(EXECUTORCH_BUILD_QUANTIZED)
417-
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/kernels/quantized)
418-
endif()
414+
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/kernels/quantized)
419415

420416
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/configurations)
421417

@@ -445,19 +441,14 @@ cmake_dependent_option(
445441
EXECUTORCH_BUILD_HOST_TARGETS OFF)
446442
if(EXECUTORCH_BUILD_EXECUTOR_RUNNER)
447443
# Baseline libraries that executor_runner will link against.
448-
set(_executor_runner_libs executorch gflags)
444+
set(_executor_runner_libs executorch gflags quantized_ops_lib)
449445

450446
if(EXECUTORCH_BUILD_OPTIMIZED)
451447
list(APPEND _executor_runner_libs optimized_native_cpu_ops_lib)
452448
else()
453449
list(APPEND _executor_runner_libs portable_ops_lib)
454450
endif()
455451

456-
# Generate lib to register quantized ops
457-
if(EXECUTORCH_BUILD_QUANTIZED)
458-
list(APPEND _executor_runner_libs quantized_ops_lib)
459-
endif()
460-
461452
add_executable(executor_runner ${_executor_runner__srcs})
462453
if(CMAKE_BUILD_TYPE STREQUAL "Release" AND NOT APPLE)
463454
target_link_options(executor_runner PRIVATE "LINKER:--gc-sections")

backends/vulkan/runtime/api/Context.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -495,7 +495,7 @@ inline bool Context::submit_compute_job(
495495
PipelineBarrier& pipeline_barrier,
496496
const utils::uvec3& global_work_group,
497497
const utils::uvec3& local_work_group_size,
498-
const SpecVarList& specialization,
498+
const SpecVarList& specialization_constants,
499499
VkFence fence_handle,
500500
Arguments&&... arguments) {
501501
// If any of the provided arguments does not have memory associated with it,
@@ -538,8 +538,8 @@ inline bool Context::submit_compute_job(
538538
#endif /* USE_VULKAN_GPU_DIAGNOSTICS */
539539

540540
// Factor out template parameter independent code to minimize code bloat.
541-
DescriptorSet descriptor_set =
542-
get_descriptor_set(shader, local_work_group_size, specialization);
541+
DescriptorSet descriptor_set = get_descriptor_set(
542+
shader, local_work_group_size, specialization_constants);
543543

544544
detail::bind(
545545
descriptor_set,

backends/vulkan/runtime/api/Pipeline.cpp

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -157,9 +157,7 @@ bool operator==(const SpecVar& lhs, const SpecVar& rhs) {
157157
return false;
158158
}
159159

160-
SpecVarList::SpecVarList() {
161-
vars.reserve(8);
162-
}
160+
SpecVarList::SpecVarList() {}
163161

164162
SpecVarList::SpecVarList(std::initializer_list<SpecVar> init_list) {
165163
vars.resize(init_list.size());
@@ -176,7 +174,7 @@ std::vector<VkSpecializationMapEntry> SpecVarList::generate_map_entries()
176174
map_entries.resize(vars.size());
177175
uint32_t cur_offset = 0u;
178176
for (uint32_t i = 0; i < vars.size(); ++i) {
179-
map_entries[i] = {
177+
map_entries.at(i) = {
180178
i, cur_offset + vars.at(i).val_offset(), vars.at(i).val_size()};
181179
cur_offset += sizeof(SpecVar);
182180
}

backends/vulkan/runtime/api/Pipeline.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,12 +53,17 @@ struct SpecVar final {
5353

5454
bool operator==(const SpecVar& lhs, const SpecVar& rhs);
5555

56-
struct SpecVarList final {
56+
class SpecVarList final {
5757
std::vector<SpecVar> vars;
5858

59+
public:
5960
SpecVarList();
6061
SpecVarList(std::initializer_list<SpecVar> init_list);
6162

63+
inline const SpecVar& at(const size_t index) const {
64+
return vars.at(index);
65+
}
66+
6267
inline const SpecVar* data() const {
6368
return vars.data();
6469
}
@@ -235,7 +240,7 @@ class ComputePipelineCache final {
235240
seed = utils::hash_combine(seed, std::hash<uint32_t>()(spec_vars.size()));
236241

237242
for (int i = 0; i < spec_vars.size(); ++i) {
238-
const SpecVar& spec_var = spec_vars.vars.at(i);
243+
const SpecVar& spec_var = spec_vars.at(i);
239244
size_t new_seed = 0;
240245
switch (spec_var.type) {
241246
case SpecVar::Type::FLOAT:

backends/vulkan/test/utils/test_utils.cpp

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,14 @@ void record_nchw_to_image_op(
2121
api::VulkanBuffer& src_buffer,
2222
vTensor& v_dst) {
2323
api::PipelineBarrier pipeline_barrier{};
24+
api::SpecVarList specialization_constants = {};
2425

2526
context->submit_compute_job(
2627
get_nchw_to_image_shader(v_dst),
2728
pipeline_barrier,
2829
v_dst.virtual_extents(),
2930
adaptive_work_group_size(v_dst.virtual_extents()),
30-
{},
31+
specialization_constants,
3132
VK_NULL_HANDLE,
3233
v_dst.image(
3334
pipeline_barrier,
@@ -43,12 +44,14 @@ void record_image_to_nchw_op(
4344
vTensor& v_src,
4445
api::VulkanBuffer& dst_buffer) {
4546
api::PipelineBarrier pipeline_barrier{};
47+
api::SpecVarList specialization_constants = {};
48+
4649
context->submit_compute_job(
4750
get_image_to_nchw_shader(v_src),
4851
pipeline_barrier,
4952
v_src.virtual_extents(),
5053
adaptive_work_group_size(v_src.virtual_extents()),
51-
{},
54+
specialization_constants,
5255
VK_NULL_HANDLE,
5356
v_src.image(pipeline_barrier, api::PipelineStage::COMPUTE),
5457
dst_buffer,
@@ -80,12 +83,13 @@ void record_conv2d_prepack_weights_op(
8083
api::UniformParamsBuffer padded_sizes_ubo(
8184
context, api::utils::make_ivec2(padded_sizes, /*reverse = */ true));
8285

86+
api::SpecVarList specialization_constants = {};
8387
context->submit_compute_job(
8488
shader,
8589
pipeline_barrier,
8690
v_dst.virtual_extents(),
8791
adaptive_work_group_size(v_dst.virtual_extents()),
88-
{},
92+
specialization_constants,
8993
VK_NULL_HANDLE,
9094
v_dst.image(
9195
pipeline_barrier,
@@ -107,12 +111,13 @@ void record_binary_op(
107111
add_dtype_suffix(kernel_name, v_dst);
108112

109113
api::PipelineBarrier pipeline_barrier{};
114+
api::SpecVarList specialization_constants = {};
110115
context->submit_compute_job(
111116
VK_KERNEL_FROM_STR(kernel_name),
112117
pipeline_barrier,
113118
v_dst.virtual_extents(),
114119
adaptive_work_group_size(v_dst.virtual_extents()),
115-
{},
120+
specialization_constants,
116121
VK_NULL_HANDLE,
117122
v_dst.image(
118123
pipeline_barrier,

backends/vulkan/test/vulkan_compute_api_test.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -153,12 +153,13 @@ TEST_F(VulkanComputeAPITest, update_params_between_submit) {
153153

154154
{
155155
api::PipelineBarrier pipeline_barrier{};
156+
api::SpecVarList specialization_constants = {};
156157
api::context()->submit_compute_job(
157158
VK_KERNEL_FROM_STR(kernel_name),
158159
pipeline_barrier,
159160
{4, 4, 4},
160161
{4, 4, 4},
161-
{},
162+
specialization_constants,
162163
VK_NULL_HANDLE,
163164
a.image(
164165
pipeline_barrier,
@@ -213,12 +214,13 @@ void test_storage_buffer_type(const size_t len) {
213214
{
214215
uint32_t len_div4 = api::utils::div_up(uint32_t(len), uint32_t(4));
215216
api::PipelineBarrier pipeline_barrier{};
217+
api::SpecVarList specialization_constants = {};
216218
api::context()->submit_compute_job(
217219
VK_KERNEL_FROM_STR(kernel_name),
218220
pipeline_barrier,
219221
{64, 1, 1},
220222
{len_div4, 1, 1},
221-
{},
223+
specialization_constants,
222224
VK_NULL_HANDLE,
223225
buffer.buffer(),
224226
params.buffer());
@@ -909,12 +911,13 @@ void run_from_gpu_test(
909911

910912
{
911913
api::PipelineBarrier pipeline_barrier{};
914+
api::SpecVarList specialization_constants = {};
912915
api::context()->submit_compute_job(
913916
VK_KERNEL_FROM_STR(kernel_name),
914917
pipeline_barrier,
915918
vten.virtual_extents(),
916919
{4, 4, 4},
917-
{},
920+
specialization_constants,
918921
VK_NULL_HANDLE,
919922
vten.image(
920923
pipeline_barrier,

build/Utils.cmake

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,8 +74,6 @@ function(executorch_print_configuration_summary)
7474
STATUS " EXECUTORCH_BUILD_QNN : ${EXECUTORCH_BUILD_QNN}")
7575
message(STATUS " EXECUTORCH_BUILD_OPTIMIZED : "
7676
"${EXECUTORCH_BUILD_OPTIMIZED}")
77-
message(STATUS " EXECUTORCH_BUILD_QUANTIZED : "
78-
"${EXECUTORCH_BUILD_QUANTIZED}")
7977
message(
8078
STATUS " EXECUTORCH_BUILD_SDK : ${EXECUTORCH_BUILD_SDK}")
8179
message(

build/build_apple_frameworks.sh

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ CUSTOM=OFF
2222
MPS=OFF
2323
OPTIMIZED=OFF
2424
PORTABLE=OFF
25-
QUANTIZED=OFF
25+
QUANTIZED=ON
2626
XNNPACK=OFF
2727
HEADERS_PATH="include"
2828
EXECUTORCH_FRAMEWORK="executorch:libexecutorch.a,libexecutorch_no_prim_ops.a,libextension_apple.a,libextension_data_loader.a,libextension_module.a:$HEADERS_PATH"
@@ -51,7 +51,6 @@ usage() {
5151
echo " --mps Include this flag to build the Metal Performance Shaders backend."
5252
echo " --optimized Include this flag to build the Optimized backend."
5353
echo " --portable Include this flag to build the Portable backend."
54-
echo " --quantized Include this flag to build the Quantized backend."
5554
echo " --xnnpack Include this flag to build the XNNPACK backend."
5655
echo
5756
echo "Example:"
@@ -74,7 +73,6 @@ for arg in "$@"; do
7473
--mps) MPS=ON ;;
7574
--optimized) OPTIMIZED=ON ;;
7675
--portable) PORTABLE=ON ;;
77-
--quantized) QUANTIZED=ON ;;
7876
--xnnpack) XNNPACK=ON ;;
7977
*)
8078
if [[ -z "$SOURCE_ROOT_DIR" ]]; then
@@ -137,7 +135,6 @@ cmake_build() {
137135
-DEXECUTORCH_BUILD_CUSTOM=$CUSTOM \
138136
-DEXECUTORCH_BUILD_MPS=$MPS \
139137
-DEXECUTORCH_BUILD_OPTIMIZED=$OPTIMIZED \
140-
-DEXECUTORCH_BUILD_QUANTIZED=$QUANTIZED \
141138
-DEXECUTORCH_BUILD_XNNPACK=$XNNPACK \
142139
${platform_flag:+-DIOS_PLATFORM=$platform_flag}
143140
cmake --build . --config $MODE
@@ -181,7 +178,7 @@ append_framework_flag "$CUSTOM" "$CUSTOM_FRAMEWORK"
181178
append_framework_flag "$MPS" "$MPS_FRAMEWORK"
182179
append_framework_flag "$OPTIMIZED" "$OPTIMIZED_FRAMEWORK"
183180
append_framework_flag "$PORTABLE" "$PORTABLE_FRAMEWORK"
184-
append_framework_flag "$QUANTIZED" "$QUANTIZED_FRAMEWORK"
181+
append_framework_flag "ON" "$QUANTIZED_FRAMEWORK"
185182
append_framework_flag "$XNNPACK" "$XNNPACK_FRAMEWORK"
186183

187184
"$SOURCE_ROOT_DIR"/build/create_frameworks.sh "${FRAMEWORK_FLAGS[@]}"

build/executorch-config.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ set(lib_list
3838
etdump bundled_program extension_data_loader ${FLATCCRT_LIB} mpsdelegate
3939
qnn_executorch_backend portable_ops_lib extension_module xnnpack_backend
4040
XNNPACK cpuinfo pthreadpool vulkan_backend optimized_kernels cpublas eigen_blas
41-
optimized_ops_lib optimized_native_cpu_ops_lib
41+
optimized_ops_lib optimized_native_cpu_ops_lib quantized_kernels quantized_ops_lib
4242
)
4343
foreach(lib ${lib_list})
4444
# Name of the variable which stores result of the find_library search

0 commit comments

Comments
 (0)