quantized embedding

lucylq · lucylq · commit 2f6470958255 · 2024-08-13T17:24:45.000-07:00
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -540,10 +540,6 @@ if(EXECUTORCH_BUILD_KERNELS_OPTIMIZED)
   add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/kernels/optimized)
 endif()
 
-if(EXECUTORCH_BUILD_KERNELS_QUANTIZED)
-  add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/kernels/quantized)
-endif()
-
 add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/configurations)
 
 #
@@ -574,30 +570,6 @@ cmake_dependent_option(
   EXECUTORCH_BUILD_EXECUTOR_RUNNER "Build the executor_runner executable" ON
   EXECUTORCH_BUILD_HOST_TARGETS OFF
 )
-if(EXECUTORCH_BUILD_EXECUTOR_RUNNER)
-  # Baseline libraries that executor_runner will link against.
-  set(_executor_runner_libs executorch gflags)
-
-  if(EXECUTORCH_BUILD_KERNELS_OPTIMIZED)
-    list(APPEND _executor_runner_libs optimized_native_cpu_ops_lib)
-  elseif(EXECUTORCH_BUILD_CADENCE)
-    list(APPEND _executor_runner_libs cadence_ops_lib)
-  else()
-    list(APPEND _executor_runner_libs portable_ops_lib)
-  endif()
-
-  # Generate lib to register quantized ops
-  if(EXECUTORCH_BUILD_KERNELS_QUANTIZED)
-    list(APPEND _executor_runner_libs quantized_ops_lib)
-  endif()
-
-  add_executable(executor_runner ${_executor_runner__srcs})
-  if(CMAKE_BUILD_TYPE STREQUAL "Release" AND NOT APPLE)
-    target_link_options(executor_runner PRIVATE "LINKER:--gc-sections")
-  endif()
-  target_link_libraries(executor_runner ${_executor_runner_libs})
-  target_compile_options(executor_runner PUBLIC ${_common_compile_options})
-endif()
 
 # Add googletest if any test targets should be built
 if(EXECUTORCH_BUILD_GTESTS)
@@ -698,10 +670,6 @@ if(EXECUTORCH_BUILD_PYBIND)
     list(APPEND _dep_libs xnnpack_backend XNNPACK)
   endif()
 
-  if(EXECUTORCH_BUILD_KERNELS_QUANTIZED)
-    target_link_options_shared_lib(quantized_ops_lib)
-  endif()
-
   # compile options for pybind
   set(_pybind_compile_options
       -Wno-deprecated-declarations
@@ -787,5 +755,34 @@ if(EXECUTORCH_BUILD_KERNELS_CUSTOM)
   )
 endif()
 
+if(EXECUTORCH_BUILD_KERNELS_QUANTIZED)
+  add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/kernels/quantized)
+  target_link_options_shared_lib(quantized_ops_lib)
+endif()
+
+if(EXECUTORCH_BUILD_EXECUTOR_RUNNER)
+  # Baseline libraries that executor_runner will link against.
+  set(_executor_runner_libs executorch gflags)
+
+  if(EXECUTORCH_BUILD_KERNELS_OPTIMIZED)
+    list(APPEND _executor_runner_libs optimized_native_cpu_ops_lib)
+  elseif(EXECUTORCH_BUILD_CADENCE)
+    list(APPEND _executor_runner_libs cadence_ops_lib)
+  else()
+    list(APPEND _executor_runner_libs portable_ops_lib)
+  endif()
+
+  # Generate lib to register quantized ops
+  if(EXECUTORCH_BUILD_KERNELS_QUANTIZED)
+    list(APPEND _executor_runner_libs quantized_ops_lib)
+  endif()
+
+  add_executable(executor_runner ${_executor_runner__srcs})
+  if(CMAKE_BUILD_TYPE STREQUAL "Release" AND NOT APPLE)
+    target_link_options(executor_runner PRIVATE "LINKER:--gc-sections")
+  endif()
+  target_link_libraries(executor_runner ${_executor_runner_libs})
+  target_compile_options(executor_runner PUBLIC ${_common_compile_options})
+endif()
 # Print all summary
 executorch_print_configuration_summary()
diff --git a/build/Codegen.cmake b/build/Codegen.cmake
@@ -150,11 +150,13 @@ function(gen_custom_ops_aot_lib)
   include(${EXECUTORCH_ROOT}/build/Utils.cmake)
 
   target_link_options_shared_lib(${GEN_LIB_NAME})
-  if(EXECUTORCH_BUILD_PYBIND AND APPLE)
-    target_link_libraries(${GEN_LIB_NAME} PRIVATE executorch_no_prim_ops)
-    target_link_options(${GEN_LIB_NAME} PRIVATE -undefined dynamic_lookup)
+  if(TARGET portable_lib)
+    target_link_libraries(${GEN_LIB_NAME} PRIVATE portable_lib)
+    # target_link_libraries(${GEN_LIB_NAME} PRIVATE executorch_no_prim_ops)
+    # target_link_options(${GEN_LIB_NAME} PRIVATE -undefined dynamic_lookup)
   else()
-    target_link_libraries(${GEN_LIB_NAME} PRIVATE executorch_no_prim_ops)
+    target_link_libraries(${GEN_LIB_NAME} PRIVATE portable_lib)
+    # target_link_libraries(${GEN_LIB_NAME} PRIVATE executorch_no_prim_ops)
   endif()
 endfunction()
 
diff --git a/examples/models/llama2/runner/generation.py b/examples/models/llama2/runner/generation.py
@@ -26,6 +26,10 @@
 
 from executorch.extension.pybindings.portable_lib import _load_for_executorch
 
+	
+from executorch.exir.passes._quant_patterns_and_replacements import (  # noqa
+    quantized_decomposed_lib,
+)
 
 class CompletionPrediction(TypedDict, total=False):
     generation: str
diff --git a/install_requirements.sh b/install_requirements.sh
@@ -165,7 +165,7 @@ $PIP_EXECUTABLE install --extra-index-url "${TORCH_NIGHTLY_URL}" \
 # The --extra-index-url may be necessary if pyproject.toml has a dependency on a
 # pre-release or nightly version of a torch package.
 #
-
+CMAKE_ARGS="$CMAKE_ARGS -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON"
 EXECUTORCH_BUILD_PYBIND="${EXECUTORCH_BUILD_PYBIND}" \
     CMAKE_ARGS="${CMAKE_ARGS}" \
     CMAKE_BUILD_ARGS="${CMAKE_BUILD_ARGS}" \
diff --git a/kernels/quantized/CMakeLists.txt b/kernels/quantized/CMakeLists.txt
@@ -84,9 +84,39 @@ if(NOT CMAKE_GENERATOR STREQUAL "Xcode"
         ${EXECUTORCH_ROOT}/kernels/portable/cpu/util/reduce_util.cpp
         ${EXECUTORCH_ROOT}/runtime/core/exec_aten/util/tensor_util_aten.cpp
     )
+
+    # Build aot library for embedding ops. Used for pybindings only.
+    # set(_embedding_aot_ops
+    #   "quantized_decomposed::embedding_4bit.dtype_out"
+    # )
+
+    # gen_selected_ops(
+    #   LIB_NAME "embedding_ops_aot_lib" ROOT_OPS ${_embedding_aot_ops}
+    # )
+
+    # # Expect gen_selected_ops output file to be
+    # # embedding_ops_aot_lib/selected_operators.yaml
+    # generate_bindings_for_kernels(
+    #   LIB_NAME "embedding_ops_aot_lib" CUSTOM_OPS_YAML "${_yaml_file}"
+    # )
+    # set(_out_dir ${CMAKE_CURRENT_BINARY_DIR}/embedding_ops_aot_lib)
+
+    # add_library(
+    #   embedding_ops_aot_lib
+    #   ${_out_dir}/RegisterCodegenUnboxedKernelsEverything.cpp
+    #   ${_out_dir}/Functions.h ${_out_dir}/NativeFunctions.h
+    # )
+
+    # target_link_libraries(embedding_ops_aot_lib PRIVATE executorch_no_prim_ops)
+    # list(APPEND _quantized_sources ${_out_dir}/RegisterCodegenUnboxedKernelsEverything.cpp ${_out_dir}/Functions.h)
     gen_custom_ops_aot_lib(
       LIB_NAME "quantized_ops_aot_lib" KERNEL_SOURCES "${_quantized_sources}"
     )
+    target_include_directories(
+      quantized_ops_aot_lib PUBLIC "${_common_include_directories}"
+    )
+
+    # target_link_libraries(quantized_ops_aot_lib PUBLIC embedding_ops_aot_lib)
   endif()
 endif()
 
diff --git a/kernels/quantized/__init__.py b/kernels/quantized/__init__.py
@@ -7,15 +7,21 @@
 try:
     from pathlib import Path
 
-    libs = list(Path(__file__).parent.resolve().glob("**/libquantized_ops_aot_lib.*"))
+    quantized_ops_aot_lib = list(Path(__file__).parent.resolve().glob("**/libquantized_ops_aot_lib.*"))
+    # embedding_ops_aot_lib = list(Path(__file__).parent.resolve().glob("**/libembedding_ops_aot_lib.*"))
     del Path
-    assert len(libs) == 1, f"Expected 1 library but got {len(libs)}"
+    assert len(quantized_ops_aot_lib) == 1, f"Expected 1 library but got {len(quantized_ops_aot_lib)}"
+    # assert len(embedding_ops_aot_lib) == 1, f"Expected 1 library but got {len(embedding_ops_aot_lib)}"
     import torch as _torch
 
-    _torch.ops.load_library(libs[0])
+    _torch.ops.load_library(quantized_ops_aot_lib[0])
+    op = torch.ops.quantized.add_out
+    assert op is not None
+    # _torch.ops.load_library(embedding_ops_aot_lib[0])
+
     del _torch
 except:
     import logging
 
-    logging.info("libquantized_ops_aot_lib is not loaded")
+    logging.info("libquantized_ops_aot_lib and/or libembedding_ops_aot_lib is not loaded")
     del logging

Original file line number	Diff line number	Diff line change
`@@ -165,7 +165,7 @@ $PIP_EXECUTABLE install --extra-index-url "${TORCH_NIGHTLY_URL}" \`
`165`	`165`	`# The --extra-index-url may be necessary if pyproject.toml has a dependency on a`
`166`	`166`	`# pre-release or nightly version of a torch package.`
`167`	`167`	`#`
`168`		`-`
	`168`	`+CMAKE_ARGS="$CMAKE_ARGS -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON"`
`169`	`169`	`EXECUTORCH_BUILD_PYBIND="${EXECUTORCH_BUILD_PYBIND}" \`
`170`	`170`	`CMAKE_ARGS="${CMAKE_ARGS}" \`
`171`	`171`	`CMAKE_BUILD_ARGS="${CMAKE_BUILD_ARGS}" \`