Enable embedding quant ops in runner (#423)

kimishpatel · malfet · commit f93fd8010fd0 · 2024-07-16T23:03:13.000-07:00
Summary:
Link against quantized ops lib

Test Plan:
python torchchat.py download stories15M
export PRMT="Once upon a time in a land far away"
python torchchat.py export stories15M --quant '{"linear:a8w4dq" :
{"groupsize": 32}, "embedding" : {"bitwidth": 8, "groupsize": 0}}'
--output-pte-path ./model.pte
./scripts/install_et.sh
rm -rf build/cmake-out/
cmake -S ./runner-et -B ./runner-et/cmake-out -G Ninja
cmake --build ./runner-et/cmake-out
./runner-et/cmake-out/run ./model.pte -z ./tokenizer.bin -t 0 -i
"${PRMT}"

Reviewers:

Subscribers:

Tasks:

Tags:
diff --git a/runner-et/CMakeLists.txt b/runner-et/CMakeLists.txt
@@ -43,17 +43,20 @@ target_link_libraries(
         extension_module
         ${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/src/executorch/${CMAKE_OUT_DIR}/extension/data_loader/libextension_data_loader.a # This one does not get installed by ExecuTorch
         optimized_kernels
+        quantized_kernels
         portable_kernels
         cpublas
         eigen_blas
         # The libraries below need to be whole-archived linked
         optimized_native_cpu_ops_lib
+        quantized_ops_lib
         xnnpack_backend
         XNNPACK
         pthreadpool
         cpuinfo
 )
 target_link_options_shared_lib(optimized_native_cpu_ops_lib)
+target_link_options_shared_lib(quantized_ops_lib)
 target_link_options_shared_lib(xnnpack_backend)
 # Not clear why linking executorch as whole-archive outside android/apple is leading
 # to double registration. Most likely because of linkage issues.