larryliu0820
diff --git a/‎examples/models/llama2/CMakeLists.txt
Lines changed: 3 additions & 20 deletions b/‎examples/models/llama2/CMakeLists.txt
Lines changed: 3 additions & 20 deletions
diff --git a/‎examples/models/llama2/TARGETS
Lines changed: 0 additions & 1 deletion b/‎examples/models/llama2/TARGETS
Lines changed: 0 additions & 1 deletion
diff --git a/‎examples/models/llama2/ops/TARGETS
Lines changed: 0 additions & 5 deletions b/‎examples/models/llama2/ops/TARGETS
Lines changed: 0 additions & 5 deletions
diff --git a/‎examples/models/llama2/ops/quantized.yaml
Lines changed: 0 additions & 11 deletions b/‎examples/models/llama2/ops/quantized.yaml
Lines changed: 0 additions & 11 deletions
diff --git a/‎examples/models/llama2/ops/quantized_ops.py
Lines changed: 0 additions & 169 deletions b/‎examples/models/llama2/ops/quantized_ops.py
Lines changed: 0 additions & 169 deletions
diff --git a/‎examples/models/llama2/ops/targets.bzl
Lines changed: 0 additions & 50 deletions b/‎examples/models/llama2/ops/targets.bzl
Lines changed: 0 additions & 50 deletions
diff --git a/‎examples/models/llama2/quantize.py
Lines changed: 4 additions & 2 deletions b/‎examples/models/llama2/quantize.py
Lines changed: 4 additions & 2 deletions
diff --git a/‎examples/models/llama2/runner/targets.bzl
Lines changed: 1 addition & 1 deletion b/‎examples/models/llama2/runner/targets.bzl
Lines changed: 1 addition & 1 deletion
@@ -44,7 +44,6 @@ set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../../..)
 set(TORCH_ROOT ${EXECUTORCH_ROOT}/third-party/pytorch)
 
 include(${EXECUTORCH_ROOT}/build/Utils.cmake)
-include(${EXECUTORCH_ROOT}/build/Codegen.cmake)
 
 if(NOT PYTHON_EXECUTABLE)
   resolve_python_executable()
@@ -120,25 +119,9 @@ else()
   target_link_options_shared_lib(portable_ops_lib)
 endif()
 
-# quantized ops yaml file operation
-merge_yaml(
-  FUNCTIONS_YAML ${CMAKE_CURRENT_SOURCE_DIR}/ops/quantized.yaml
-  FALLBACK_YAML ${EXECUTORCH_ROOT}/kernels/quantized/quantized.yaml
-  OUTPUT_DIR ${CMAKE_CURRENT_BINARY_DIR})
-
-gen_selected_ops("${CMAKE_CURRENT_BINARY_DIR}/merged.yaml" "" "")
-generate_bindings_for_kernels(
-    FUNCTIONS_YAML ${CMAKE_CURRENT_BINARY_DIR}/merged.yaml)
-message("Generated files ${gen_command_sources}")
-
-# quantized_merge_ops_lib: Register quantized op kernels into the runtime
-gen_operators_lib(
-  "quantized_merge_ops_lib"
-  KERNEL_LIBS quantized_kernels
-  DEPS executorch)
-target_include_directories(quantized_merge_ops_lib PUBLIC ${_common_include_directories})
-target_link_options_shared_lib(quantized_merge_ops_lib)
-list(APPEND link_libraries quantized_kernels quantized_merge_ops_lib)
+# quantized_ops_lib: Register quantized op kernels into the runtime
+target_link_options_shared_lib(quantized_ops_lib)
+list(APPEND link_libraries quantized_kernels quantized_ops_lib)
 
 if(EXECUTORCH_BUILD_CUSTOM)
   target_link_options_shared_lib(custom_ops)
 
@@ -42,7 +42,6 @@ runtime.python_library(
         "//caffe2:torch",
         "//executorch/examples/models:model_base",
         "//executorch/examples/models/llama2:llama_transformer",
-        "//executorch/examples/models/llama2/ops:quantized_aot_lib",
     ],
 )
 
 
@@ -9,7 +9,9 @@
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
-from .ops.quantized_ops import *  # noqa
+from executorch.exir.passes._quant_patterns_and_replacements import (  # noqa
+    quantized_decomposed_lib,
+)
 
 
 try:
@@ -377,7 +379,7 @@ def __init__(
 
     @torch.no_grad()
     def forward(self, indices: torch.Tensor) -> torch.Tensor:
-        return torch.ops.llama_quantized.DEPRECATED_DO_NOT_USE_embedding_byte.dtype(
+        return torch.ops.quantized_decomposed.embedding_byte.dtype(
             self.weight, self.scales, None, 0, 0, indices, dtype=self.dtype
         )
 
 
@@ -6,7 +6,7 @@ def _get_operator_lib(aten = False):
     elif runtime.is_oss:
         return ["//executorch/kernels/portable:generated_lib", "//executorch/examples/models/llama2/custom_ops:custom_ops", "//executorch/examples/models/llama2/ops:generated_lib"]
     else:
-        return ["//executorch/configurations:optimized_native_cpu_ops", "//executorch/examples/models/llama2/custom_ops:custom_ops", "//executorch/examples/models/llama2/ops:generated_lib"]
+        return ["//executorch/configurations:optimized_native_cpu_ops", "//executorch/examples/models/llama2/custom_ops:custom_ops"]
 
 def define_common_targets():
     for aten in (True, False):
Original file line number	Diff line number	Diff line change
`@@ -42,7 +42,6 @@ runtime.python_library(`
`42`	`42`	`"//caffe2:torch",`
`43`	`43`	`"//executorch/examples/models:model_base",`
`44`	`44`	`"//executorch/examples/models/llama2:llama_transformer",`
`45`		`- "//executorch/examples/models/llama2/ops:quantized_aot_lib",`
`46`	`45`	`],`
`47`	`46`	`)`
`48`	`47`