Skip to content

Commit 2f64709

Browse files
committed
quantized embedding
1 parent d0192e5 commit 2f64709

File tree

6 files changed

+80
-41
lines changed

6 files changed

+80
-41
lines changed

CMakeLists.txt

Lines changed: 29 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -540,10 +540,6 @@ if(EXECUTORCH_BUILD_KERNELS_OPTIMIZED)
540540
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/kernels/optimized)
541541
endif()
542542

543-
if(EXECUTORCH_BUILD_KERNELS_QUANTIZED)
544-
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/kernels/quantized)
545-
endif()
546-
547543
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/configurations)
548544

549545
#
@@ -574,30 +570,6 @@ cmake_dependent_option(
574570
EXECUTORCH_BUILD_EXECUTOR_RUNNER "Build the executor_runner executable" ON
575571
EXECUTORCH_BUILD_HOST_TARGETS OFF
576572
)
577-
if(EXECUTORCH_BUILD_EXECUTOR_RUNNER)
578-
# Baseline libraries that executor_runner will link against.
579-
set(_executor_runner_libs executorch gflags)
580-
581-
if(EXECUTORCH_BUILD_KERNELS_OPTIMIZED)
582-
list(APPEND _executor_runner_libs optimized_native_cpu_ops_lib)
583-
elseif(EXECUTORCH_BUILD_CADENCE)
584-
list(APPEND _executor_runner_libs cadence_ops_lib)
585-
else()
586-
list(APPEND _executor_runner_libs portable_ops_lib)
587-
endif()
588-
589-
# Generate lib to register quantized ops
590-
if(EXECUTORCH_BUILD_KERNELS_QUANTIZED)
591-
list(APPEND _executor_runner_libs quantized_ops_lib)
592-
endif()
593-
594-
add_executable(executor_runner ${_executor_runner__srcs})
595-
if(CMAKE_BUILD_TYPE STREQUAL "Release" AND NOT APPLE)
596-
target_link_options(executor_runner PRIVATE "LINKER:--gc-sections")
597-
endif()
598-
target_link_libraries(executor_runner ${_executor_runner_libs})
599-
target_compile_options(executor_runner PUBLIC ${_common_compile_options})
600-
endif()
601573

602574
# Add googletest if any test targets should be built
603575
if(EXECUTORCH_BUILD_GTESTS)
@@ -698,10 +670,6 @@ if(EXECUTORCH_BUILD_PYBIND)
698670
list(APPEND _dep_libs xnnpack_backend XNNPACK)
699671
endif()
700672

701-
if(EXECUTORCH_BUILD_KERNELS_QUANTIZED)
702-
target_link_options_shared_lib(quantized_ops_lib)
703-
endif()
704-
705673
# compile options for pybind
706674
set(_pybind_compile_options
707675
-Wno-deprecated-declarations
@@ -787,5 +755,34 @@ if(EXECUTORCH_BUILD_KERNELS_CUSTOM)
787755
)
788756
endif()
789757

758+
if(EXECUTORCH_BUILD_KERNELS_QUANTIZED)
759+
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/kernels/quantized)
760+
target_link_options_shared_lib(quantized_ops_lib)
761+
endif()
762+
763+
if(EXECUTORCH_BUILD_EXECUTOR_RUNNER)
764+
# Baseline libraries that executor_runner will link against.
765+
set(_executor_runner_libs executorch gflags)
766+
767+
if(EXECUTORCH_BUILD_KERNELS_OPTIMIZED)
768+
list(APPEND _executor_runner_libs optimized_native_cpu_ops_lib)
769+
elseif(EXECUTORCH_BUILD_CADENCE)
770+
list(APPEND _executor_runner_libs cadence_ops_lib)
771+
else()
772+
list(APPEND _executor_runner_libs portable_ops_lib)
773+
endif()
774+
775+
# Generate lib to register quantized ops
776+
if(EXECUTORCH_BUILD_KERNELS_QUANTIZED)
777+
list(APPEND _executor_runner_libs quantized_ops_lib)
778+
endif()
779+
780+
add_executable(executor_runner ${_executor_runner__srcs})
781+
if(CMAKE_BUILD_TYPE STREQUAL "Release" AND NOT APPLE)
782+
target_link_options(executor_runner PRIVATE "LINKER:--gc-sections")
783+
endif()
784+
target_link_libraries(executor_runner ${_executor_runner_libs})
785+
target_compile_options(executor_runner PUBLIC ${_common_compile_options})
786+
endif()
790787
# Print all summary
791788
executorch_print_configuration_summary()

build/Codegen.cmake

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -150,11 +150,13 @@ function(gen_custom_ops_aot_lib)
150150
include(${EXECUTORCH_ROOT}/build/Utils.cmake)
151151

152152
target_link_options_shared_lib(${GEN_LIB_NAME})
153-
if(EXECUTORCH_BUILD_PYBIND AND APPLE)
154-
target_link_libraries(${GEN_LIB_NAME} PRIVATE executorch_no_prim_ops)
155-
target_link_options(${GEN_LIB_NAME} PRIVATE -undefined dynamic_lookup)
153+
if(TARGET portable_lib)
154+
target_link_libraries(${GEN_LIB_NAME} PRIVATE portable_lib)
155+
# target_link_libraries(${GEN_LIB_NAME} PRIVATE executorch_no_prim_ops)
156+
# target_link_options(${GEN_LIB_NAME} PRIVATE -undefined dynamic_lookup)
156157
else()
157-
target_link_libraries(${GEN_LIB_NAME} PRIVATE executorch_no_prim_ops)
158+
target_link_libraries(${GEN_LIB_NAME} PRIVATE portable_lib)
159+
# target_link_libraries(${GEN_LIB_NAME} PRIVATE executorch_no_prim_ops)
158160
endif()
159161
endfunction()
160162

examples/models/llama2/runner/generation.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,10 @@
2626

2727
from executorch.extension.pybindings.portable_lib import _load_for_executorch
2828

29+
30+
from executorch.exir.passes._quant_patterns_and_replacements import ( # noqa
31+
quantized_decomposed_lib,
32+
)
2933

3034
class CompletionPrediction(TypedDict, total=False):
3135
generation: str

install_requirements.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ $PIP_EXECUTABLE install --extra-index-url "${TORCH_NIGHTLY_URL}" \
165165
# The --extra-index-url may be necessary if pyproject.toml has a dependency on a
166166
# pre-release or nightly version of a torch package.
167167
#
168-
168+
CMAKE_ARGS="$CMAKE_ARGS -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON"
169169
EXECUTORCH_BUILD_PYBIND="${EXECUTORCH_BUILD_PYBIND}" \
170170
CMAKE_ARGS="${CMAKE_ARGS}" \
171171
CMAKE_BUILD_ARGS="${CMAKE_BUILD_ARGS}" \

kernels/quantized/CMakeLists.txt

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,9 +84,39 @@ if(NOT CMAKE_GENERATOR STREQUAL "Xcode"
8484
${EXECUTORCH_ROOT}/kernels/portable/cpu/util/reduce_util.cpp
8585
${EXECUTORCH_ROOT}/runtime/core/exec_aten/util/tensor_util_aten.cpp
8686
)
87+
88+
# Build aot library for embedding ops. Used for pybindings only.
89+
# set(_embedding_aot_ops
90+
# "quantized_decomposed::embedding_4bit.dtype_out"
91+
# )
92+
93+
# gen_selected_ops(
94+
# LIB_NAME "embedding_ops_aot_lib" ROOT_OPS ${_embedding_aot_ops}
95+
# )
96+
97+
# # Expect gen_selected_ops output file to be
98+
# # embedding_ops_aot_lib/selected_operators.yaml
99+
# generate_bindings_for_kernels(
100+
# LIB_NAME "embedding_ops_aot_lib" CUSTOM_OPS_YAML "${_yaml_file}"
101+
# )
102+
# set(_out_dir ${CMAKE_CURRENT_BINARY_DIR}/embedding_ops_aot_lib)
103+
104+
# add_library(
105+
# embedding_ops_aot_lib
106+
# ${_out_dir}/RegisterCodegenUnboxedKernelsEverything.cpp
107+
# ${_out_dir}/Functions.h ${_out_dir}/NativeFunctions.h
108+
# )
109+
110+
# target_link_libraries(embedding_ops_aot_lib PRIVATE executorch_no_prim_ops)
111+
# list(APPEND _quantized_sources ${_out_dir}/RegisterCodegenUnboxedKernelsEverything.cpp ${_out_dir}/Functions.h)
87112
gen_custom_ops_aot_lib(
88113
LIB_NAME "quantized_ops_aot_lib" KERNEL_SOURCES "${_quantized_sources}"
89114
)
115+
target_include_directories(
116+
quantized_ops_aot_lib PUBLIC "${_common_include_directories}"
117+
)
118+
119+
# target_link_libraries(quantized_ops_aot_lib PUBLIC embedding_ops_aot_lib)
90120
endif()
91121
endif()
92122

kernels/quantized/__init__.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,21 @@
77
try:
88
from pathlib import Path
99

10-
libs = list(Path(__file__).parent.resolve().glob("**/libquantized_ops_aot_lib.*"))
10+
quantized_ops_aot_lib = list(Path(__file__).parent.resolve().glob("**/libquantized_ops_aot_lib.*"))
11+
# embedding_ops_aot_lib = list(Path(__file__).parent.resolve().glob("**/libembedding_ops_aot_lib.*"))
1112
del Path
12-
assert len(libs) == 1, f"Expected 1 library but got {len(libs)}"
13+
assert len(quantized_ops_aot_lib) == 1, f"Expected 1 library but got {len(quantized_ops_aot_lib)}"
14+
# assert len(embedding_ops_aot_lib) == 1, f"Expected 1 library but got {len(embedding_ops_aot_lib)}"
1315
import torch as _torch
1416

15-
_torch.ops.load_library(libs[0])
17+
_torch.ops.load_library(quantized_ops_aot_lib[0])
18+
op = torch.ops.quantized.add_out
19+
assert op is not None
20+
# _torch.ops.load_library(embedding_ops_aot_lib[0])
21+
1622
del _torch
1723
except:
1824
import logging
1925

20-
logging.info("libquantized_ops_aot_lib is not loaded")
26+
logging.info("libquantized_ops_aot_lib and/or libembedding_ops_aot_lib is not loaded")
2127
del logging

0 commit comments

Comments
 (0)