pytorch · larryliu0820 · Apr 18, 2024
@@ -44,7 +44,6 @@ set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../../..)
 set(TORCH_ROOT ${EXECUTORCH_ROOT}/third-party/pytorch)
 
 include(${EXECUTORCH_ROOT}/build/Utils.cmake)
-include(${EXECUTORCH_ROOT}/build/Codegen.cmake)
 
 if(NOT PYTHON_EXECUTABLE)
   resolve_python_executable()
@@ -120,25 +119,9 @@ else()
   target_link_options_shared_lib(portable_ops_lib)
 endif()
 
-# quantized ops yaml file operation
-merge_yaml(
-  FUNCTIONS_YAML ${CMAKE_CURRENT_SOURCE_DIR}/ops/quantized.yaml
-  FALLBACK_YAML ${EXECUTORCH_ROOT}/kernels/quantized/quantized.yaml
-  OUTPUT_DIR ${CMAKE_CURRENT_BINARY_DIR})
-
-gen_selected_ops("${CMAKE_CURRENT_BINARY_DIR}/merged.yaml" "" "")
-generate_bindings_for_kernels(
-    FUNCTIONS_YAML ${CMAKE_CURRENT_BINARY_DIR}/merged.yaml)
-message("Generated files ${gen_command_sources}")
-
-# quantized_merge_ops_lib: Register quantized op kernels into the runtime
-gen_operators_lib(
-  "quantized_merge_ops_lib"
-  KERNEL_LIBS quantized_kernels
-  DEPS executorch)
-target_include_directories(quantized_merge_ops_lib PUBLIC ${_common_include_directories})
-target_link_options_shared_lib(quantized_merge_ops_lib)
-list(APPEND link_libraries quantized_kernels quantized_merge_ops_lib)
+# quantized_ops_lib: Register quantized op kernels into the runtime
+target_link_options_shared_lib(quantized_ops_lib)
+list(APPEND link_libraries quantized_kernels quantized_ops_lib)
 
 if(EXECUTORCH_BUILD_CUSTOM)
   target_link_options_shared_lib(custom_ops)

@@ -42,7 +42,6 @@ runtime.python_library(
         "//caffe2:torch",
         "//executorch/examples/models:model_base",
         "//executorch/examples/models/llama2:llama_transformer",
-        "//executorch/examples/models/llama2/ops:quantized_aot_lib",
     ],
 )
 

@@ -9,7 +9,9 @@
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
-from .ops.quantized_ops import *  # noqa
+from executorch.exir.passes._quant_patterns_and_replacements import (  # noqa
+    quantized_decomposed_lib,
+)
 
 
 try:
@@ -377,7 +379,7 @@ def __init__(
 
     @torch.no_grad()
     def forward(self, indices: torch.Tensor) -> torch.Tensor:
-        return torch.ops.llama_quantized.DEPRECATED_DO_NOT_USE_embedding_byte.dtype(
+        return torch.ops.quantized_decomposed.embedding_byte.dtype(
             self.weight, self.scales, None, 0, 0, indices, dtype=self.dtype
         )
 

@@ -4,9 +4,9 @@ def _get_operator_lib(aten = False):
     if aten:
         return ["//executorch/kernels/aten:generated_lib"]
     elif runtime.is_oss:
-        return ["//executorch/kernels/portable:generated_lib", "//executorch/examples/models/llama2/custom_ops:custom_ops", "//executorch/examples/models/llama2/ops:generated_lib"]
+        return ["//executorch/kernels/portable:generated_lib", "//executorch/examples/models/llama2/custom_ops:custom_ops"]
     else:
-        return ["//executorch/configurations:optimized_native_cpu_ops", "//executorch/examples/models/llama2/custom_ops:custom_ops", "//executorch/examples/models/llama2/ops:generated_lib"]
+        return ["//executorch/configurations:optimized_native_cpu_ops", "//executorch/examples/models/llama2/custom_ops:custom_ops"]
 
 def define_common_targets():
     for aten in (True, False):