pytorch
diff --git a/‎.ci/docker/ci_commit_pins/pytorch.txt
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/ci_commit_pins/pytorch.txt
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/scripts/gather_test_models.py
Lines changed: 3 additions & 3 deletions b/‎.ci/scripts/gather_test_models.py
Lines changed: 3 additions & 3 deletions
diff --git a/‎.ci/scripts/test.sh
Lines changed: 2 additions & 2 deletions b/‎.ci/scripts/test.sh
Lines changed: 2 additions & 2 deletions
diff --git a/‎.gitmodules
Lines changed: 6 additions & 6 deletions b/‎.gitmodules
Lines changed: 6 additions & 6 deletions
diff --git a/‎.lintrunner.toml
Lines changed: 2 additions & 0 deletions b/‎.lintrunner.toml
Lines changed: 2 additions & 0 deletions
diff --git a/‎backends/cadence/CMakeLists.txt
Lines changed: 2 additions & 2 deletions b/‎backends/cadence/CMakeLists.txt
Lines changed: 2 additions & 2 deletions
diff --git a/‎backends/cadence/aot/compiler.py
Lines changed: 2 additions & 2 deletions b/‎backends/cadence/aot/compiler.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎backends/cadence/aot/functions.yaml
Lines changed: 6 additions & 6 deletions b/‎backends/cadence/aot/functions.yaml
Lines changed: 6 additions & 6 deletions
diff --git a/‎backends/cadence/aot/quantizer/fusion_pass.py
Lines changed: 4 additions & 10 deletions b/‎backends/cadence/aot/quantizer/fusion_pass.py
Lines changed: 4 additions & 10 deletions
diff --git a/‎backends/cadence/aot/quantizer/patterns.py
Lines changed: 20 additions & 84 deletions b/‎backends/cadence/aot/quantizer/patterns.py
Lines changed: 20 additions & 84 deletions
@@ -1 +1 @@
-c017c97333dfb9d17f2e5357980241827e50e8d5
+4e39cdceb1414b2d416339866a5bb044fbed4977
@@ -23,7 +23,7 @@
         "w2l": "linux.12xlarge",
         "ic4": "linux.12xlarge",
         "resnet50": "linux.12xlarge",
-        "llava_encoder": "linux.4xlarge",
+        "llava": "linux.4xlarge",
         # This one causes timeout on smaller runner, the root cause is unclear (T161064121)
         "dl3": "linux.12xlarge",
         "emformer_join": "linux.12xlarge",
@@ -83,7 +83,7 @@ def model_should_run_on_event(model: str, event: str) -> bool:
     We put higher priority and fast models to pull request and rest to push.
     """
     if event == "pull_request":
-        return model in ["add", "ic3", "mv2", "mv3", "resnet18", "vit", "llava_encoder"]
+        return model in ["add", "ic3", "mv2", "mv3", "resnet18", "vit", "llava"]
     return True
 
 
@@ -93,7 +93,7 @@ def model_should_run_on_target_os(model: str, target_os: str) -> bool:
     For example, a big model can be disabled in macos due to the limited macos resources.
     """
     if target_os == "macos":
-        return model not in ["llava_encoder"]
+        return model not in ["llava"]
     return True
 
 
 
@@ -67,9 +67,9 @@ test_model() {
     run_portable_executor_runner
     rm "./${MODEL_NAME}.pte"
   fi
-  if [[ "${MODEL_NAME}" == "llava_encoder" ]]; then
+  if [[ "${MODEL_NAME}" == "llava" ]]; then
     # Install requirements for llava
-    bash examples/models/llava_encoder/install_requirements.sh
+    bash examples/models/llava/install_requirements.sh
   fi
   # python3 -m examples.portable.scripts.export --model_name="llama2" should works too
   "${PYTHON_EXECUTABLE}" -m examples.portable.scripts.export --model_name="${MODEL_NAME}"
 
@@ -55,15 +55,15 @@
 [submodule "examples/third-party/LLaVA"]
 	path = examples/third-party/LLaVA
 	url = https://github.com/haotian-liu/LLaVA.git
-[submodule "examples/models/llama2/third-party/re2"]
-	path = examples/models/llama2/third-party/re2
-	url = https://github.com/google/re2.git
-[submodule "examples/models/llama2/third-party/abseil-cpp"]
-	path = examples/models/llama2/third-party/abseil-cpp
-	url = https://github.com/abseil/abseil-cpp.git
 [submodule "third-party/ios-cmake"]
 	path = third-party/ios-cmake
 	url = https://github.com/leetal/ios-cmake
 [submodule "examples/models/phi-3-mini/third-party/sentencepiece"]
 	path = examples/models/phi-3-mini/third-party/sentencepiece
 	url = https://github.com/google/sentencepiece.git
+[submodule "extension/llm/third-party/re2"]
+	path = extension/llm/third-party/re2
+	url = https://github.com/google/re2.git
+[submodule "extension/llm/third-party/abseil-cpp"]
+	path = extension/llm/third-party/abseil-cpp
+	url = https://github.com/abseil/abseil-cpp.git
@@ -7,6 +7,7 @@ exclude_patterns = [
     'third-party/**',
     '**/third-party/**',
     '.github/scripts/**',
+    'exir/serde/**',
 ]
 command = [
     'python',
@@ -37,6 +38,7 @@ include_patterns = [
 exclude_patterns = [
     'third-party/**',
     '**/third-party/**',
+    'exir/serde/**',
 ]
 command = [
     'python',
 
@@ -25,5 +25,5 @@ include(${EXECUTORCH_ROOT}/build/Utils.cmake)
 set(_common_include_directories ${EXECUTORCH_ROOT}/..)
 
 
-add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/hifi/operators)
-add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/hifi/kernels)
+add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/reference/operators)
+add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/reference/kernels)
@@ -19,7 +19,7 @@
 )
 from executorch.backends.cadence.aot.quantizer.fusion_pass import QuantFusion
 from executorch.backends.cadence.aot.quantizer.quantizer import (
-    CadenceGenericQuantizer,
+    CadenceAtenQuantizer,
     CadenceQuantizer,
 )
 from executorch.backends.cadence.aot.utils import model_is_quantized
@@ -64,7 +64,7 @@ def quantize_pt2(
 
     # Get patterns and apply fusion of dq -> op -> q to qop
     patterns = [
-        assert_is_instance(q, CadenceGenericQuantizer).pattern
+        assert_is_instance(q, CadenceAtenQuantizer).pattern
         for q in quantizer.quantizers
     ]
     QuantFusion(patterns)(converted_model)
 
@@ -107,30 +107,30 @@
   variants: function
   kernels:
     - arg_meta: null
-      kernel_name: impl::HiFi::quantize_per_tensor_out
+      kernel_name: impl::reference::quantize_per_tensor_out
 
 - func: cadence::dequantize_per_tensor.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)
   variants: function
   kernels:
     - arg_meta: null
-      kernel_name: impl::HiFi::dequantize_per_tensor_out
+      kernel_name: impl::reference::dequantize_per_tensor_out
 
 - func: cadence::quantized_conv.out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, Tensor weight_zero_point, Tensor bias_scale, float out_scale, int out_zero_point, Tensor out_multiplier, Tensor out_shift, bool channel_last=False, *, Tensor(a!) out) -> Tensor(a!)
   kernels:
     - arg_meta: null
-      kernel_name: impl::HiFi::quantized_conv_out
+      kernel_name: impl::reference::quantized_conv_out
 
 - func: cadence::quantized_layer_norm.out(Tensor input, Tensor in_scale, Tensor in_zero_point, int[] normalized_shape, Tensor weight, Tensor bias, float eps, float output_scale, int output_zero_point, *, Tensor(a!) out) -> Tensor(a!)
   kernels:
     - arg_meta: null
-      kernel_name: impl::HiFi::quantized_layer_norm_out
+      kernel_name: impl::reference::quantized_layer_norm_out
 
 - func: cadence::quantized_linear.out(Tensor src, Tensor weight, Tensor bias, int src_zero_point, Tensor weight_zero_point, Tensor out_multiplier, Tensor out_shift, int out_zero_point, Tensor? offset, *, Tensor(a!) out) -> Tensor(a!)
   kernels:
     - arg_meta: null
-      kernel_name: impl::HiFi::quantized_linear_out
+      kernel_name: impl::reference::quantized_linear_out
 
 - func: cadence::quantized_relu.out(Tensor X, Tensor X_zero_point, *, Tensor(a!) out) -> Tensor(a!)
   kernels:
     - arg_meta: null
-      kernel_name: impl::HiFi::quantized_relu_out
+      kernel_name: impl::reference::quantized_relu_out
@@ -14,21 +14,19 @@
     BmmPattern,
     Conv1dPattern,
     Conv2dPattern,
-    LayerNormFunctionalPattern,
     LayerNormPattern,
-    LinearFunctionalPattern,
     LinearPattern,
     MatmulPattern,
     ReluPattern,
 )
 from executorch.backends.cadence.aot.quantizer.utils import (
     create_zero_bias_int32,
+    find_sequential_partitions_aten,
     get_conv_args,
     quantize_tensor_multiplier,
 )
 from executorch.exir.pass_base import ExportPass
 from torch import fx
-from torch.ao.quantization.pt2e.graph_utils import find_sequential_partitions
 from torch.fx import GraphModule
 from torch.fx.passes.infra.pass_base import PassResult
 from torch.fx.passes.utils.fuser_utils import legalize_graph
@@ -310,7 +308,7 @@ def __init__(self, patterns) -> None:
 
     def call(self, graph_module: fx.GraphModule) -> PassResult:  # noqa: C901
         for pattern in self.patterns:
-            fused_partitions = find_sequential_partitions(
+            fused_partitions = find_sequential_partitions_aten(
                 graph_module,
                 pattern.partition_types(),
             )
@@ -373,9 +371,7 @@ def call(self, graph_module: fx.GraphModule) -> PassResult:  # noqa: C901
                             quant_node,
                             op_node,
                         )
-                    elif isinstance(pattern, LinearPattern) or isinstance(
-                        pattern, LinearFunctionalPattern
-                    ):
+                    elif isinstance(pattern, LinearPattern):
                         args, kwargs = get_args_and_kwargs_linear(
                             graph_module,
                             inputs_inputs,
@@ -385,9 +381,7 @@ def call(self, graph_module: fx.GraphModule) -> PassResult:  # noqa: C901
                             bias_inputs,
                             quant_node,
                         )
-                    elif isinstance(pattern, LayerNormPattern) or isinstance(
-                        pattern, LayerNormFunctionalPattern
-                    ):
+                    elif isinstance(pattern, LayerNormPattern):
                         args, kwargs = get_args_and_kwargs_layer_norm(
                             graph_module,
                             inputs_inputs,
 
@@ -8,7 +8,7 @@
 
 from abc import ABC, abstractmethod
 from dataclasses import dataclass, field
-from typing import Callable, List, Optional, Tuple, Type, Union
+from typing import List, Optional, Tuple, Union
 
 import torch
 from executorch.backends.cadence.aot.quantizer.utils import get_bias_qparams
@@ -47,17 +47,15 @@ class PartitionAnchors:
 
 class QuantizationPattern(ABC):
     @abstractmethod
-    def partition_types(
-        self,
-    ) -> Union[List[Type[torch.nn.Module]], List[Callable[..., torch.Tensor]]]:
+    def partition_types(self) -> list[OpOverload]:
         """
-        List of types to be passed to find_sequential_partitions.
+        List of types to be passed to find_sequential_partitions_aten.
         """
         pass
 
     @abstractmethod
     def get_anchors(
-        self, gm: fx.GraphModule, fused_partition: List[fx.GraphModule]
+        self, gm: torch.fx.GraphModule, fused_partition: List[fx.GraphModule]
     ) -> Optional[PartitionAnchors]:
         pass
 
@@ -71,8 +69,8 @@ def replacement_op(self) -> OpOverload:
 
 
 class AddmmPattern(QuantizationPattern):
-    def partition_types(self) -> List[Type[torch.nn.Module]]:
-        return [torch.addmm]
+    def partition_types(self) -> List[OpOverload]:
+        return [torch.ops.aten.addmm.default]
 
     def get_anchors(
         self, gm: fx.GraphModule, fused_partition: List[fx.GraphModule]
@@ -103,8 +101,8 @@ def replacement_op(self) -> OpOverload:
 
 
 class BmmPattern(QuantizationPattern):
-    def partition_types(self) -> List[Callable[..., torch.Tensor]]:
-        return [torch.bmm]
+    def partition_types(self) -> List[OpOverload]:
+        return [torch.ops.aten.bmm.default]
 
     def get_anchors(
         self, gm: fx.GraphModule, fused_partition: List[fx.GraphModule]
@@ -123,8 +121,8 @@ def replacement_op(self) -> OpOverload:
 
 
 class Conv1dPattern(QuantizationPattern):
-    def partition_types(self) -> List[Type[torch.nn.Module]]:
-        return [torch.nn.Conv1d]
+    def partition_types(self) -> List[OpOverload]:
+        return [torch.ops.aten.conv1d.default]
 
     def get_anchors(
         self, gm: fx.GraphModule, fused_partition: List[fx.GraphModule]
@@ -161,8 +159,8 @@ def replacement_op(self) -> OpOverload:
 
 
 class Conv2dPattern(QuantizationPattern):
-    def partition_types(self) -> List[Type[torch.nn.Module]]:
-        return [torch.nn.Conv2d]
+    def partition_types(self) -> List[OpOverload]:
+        return [torch.ops.aten.conv2d.default]
 
     def get_anchors(
         self, gm: fx.GraphModule, fused_partition: List[fx.GraphModule]
@@ -199,32 +197,8 @@ def replacement_op(self) -> OpOverload:
 
 
 class LayerNormPattern(QuantizationPattern):
-    def partition_types(self) -> List[Type[torch.nn.Module]]:
-        return [torch.nn.LayerNorm]
-
-    def get_anchors(
-        self, gm: fx.GraphModule, fused_partition: List[fx.GraphModule]
-    ) -> PartitionAnchors:
-        layer_norm_node = fused_partition[0].nodes[-1]
-
-        # Weights and biases are used as fp32 by our kernel, so they are
-        # passed in as others here along with the normalized shape.
-        return PartitionAnchors(
-            inputs=[(layer_norm_node, 0)],
-            weights=[],
-            biases=[],
-            # Ordering: normalized_shape, weights, bias
-            others=[(layer_norm_node, 1), (layer_norm_node, 2), (layer_norm_node, 3)],
-            output=[(layer_norm_node,)],
-        )
-
-    def replacement_op(self) -> OpOverload:
-        return torch.ops.cadence.quantized_layer_norm.default
-
-
-class LayerNormFunctionalPattern(QuantizationPattern):
-    def partition_types(self) -> List[Callable[..., torch.Tensor]]:
-        return [torch.nn.functional.layer_norm]
+    def partition_types(self) -> List[OpOverload]:
+        return [torch.ops.aten.layer_norm.default]
 
     def get_anchors(
         self, gm: fx.GraphModule, fused_partition: List[fx.GraphModule]
@@ -257,8 +231,8 @@ def replacement_op(self) -> OpOverload:
 
 
 class LinearPattern(QuantizationPattern):
-    def partition_types(self) -> List[Type[torch.nn.Module]]:
-        return [torch.nn.Linear]
+    def partition_types(self) -> List[OpOverload]:
+        return [torch.ops.aten.linear.default]
 
     def get_anchors(
         self, gm: fx.GraphModule, fused_partition: List[fx.GraphModule]
@@ -294,47 +268,9 @@ def replacement_op(self) -> OpOverload:
         return torch.ops.cadence.quantized_linear.default
 
 
-class LinearFunctionalPattern(QuantizationPattern):
-    def partition_types(self) -> List[Callable[..., torch.Tensor]]:
-        return [torch.nn.functional.linear]
-
-    def get_anchors(
-        self, gm: fx.GraphModule, fused_partition: List[fx.GraphModule]
-    ) -> PartitionAnchors:
-        linear_node = fused_partition[0].nodes[-1]
-
-        bias_qspec = DerivedQuantizationSpec(
-            derived_from=[
-                (linear_node.args[0], linear_node),
-                (linear_node.args[1], linear_node),
-            ],
-            derive_qparams_fn=get_bias_qparams,
-            dtype=torch.int32,
-            quant_min=-(2**31),
-            quant_max=2**31 - 1,
-            qscheme=torch.per_tensor_affine,
-        )
-
-        # Keep bias empty if not supplied
-        bias = []
-        if len(linear_node.args) > 2 and linear_node.args[2] is not None:
-            bias = [(linear_node, 2, bias_qspec)]
-
-        return PartitionAnchors(
-            inputs=[(linear_node, 0)],
-            weights=[(linear_node, 1)],
-            # pyre-fixme[6]: Incompatible parameter type
-            biases=bias,
-            output=[(linear_node,)],
-        )
-
-    def replacement_op(self) -> OpOverload:
-        return torch.ops.cadence.quantized_linear.default
-
-
 class MatmulPattern(QuantizationPattern):
-    def partition_types(self) -> List[Callable[..., torch.Tensor]]:
-        return [torch.matmul]
+    def partition_types(self) -> List[OpOverload]:
+        return [torch.ops.aten.matmul.default]
 
     def get_anchors(
         self, gm: fx.GraphModule, fused_partition: List[fx.GraphModule]
@@ -353,8 +289,8 @@ def replacement_op(self) -> OpOverload:
 
 
 class ReluPattern(QuantizationPattern):
-    def partition_types(self) -> List[Type[torch.nn.Module]]:
-        return [torch.nn.ReLU]
+    def partition_types(self) -> List[OpOverload]:
+        return [torch.ops.aten.relu.default]
 
     def get_anchors(
         self, gm: fx.GraphModule, fused_partition: List[fx.GraphModule]
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-c017c97333dfb9d17f2e5357980241827e50e8d5`
	`1`	`+4e39cdceb1414b2d416339866a5bb044fbed4977`
Original file line number	Diff line number	Diff line change
`@@ -19,7 +19,7 @@`
`19`	`19`	`)`
`20`	`20`	`from executorch.backends.cadence.aot.quantizer.fusion_pass import QuantFusion`
`21`	`21`	`from executorch.backends.cadence.aot.quantizer.quantizer import (`
`22`		`- CadenceGenericQuantizer,`
	`22`	`+ CadenceAtenQuantizer,`
`23`	`23`	`CadenceQuantizer,`
`24`	`24`	`)`
`25`	`25`	`from executorch.backends.cadence.aot.utils import model_is_quantized`
`@@ -64,7 +64,7 @@ def quantize_pt2(`
`64`	`64`
`65`	`65`	`# Get patterns and apply fusion of dq -> op -> q to qop`
`66`	`66`	`patterns = [`
`67`		`- assert_is_instance(q, CadenceGenericQuantizer).pattern`
	`67`	`+ assert_is_instance(q, CadenceAtenQuantizer).pattern`
`68`	`68`	`for q in quantizer.quantizers`
`69`	`69`	`]`
`70`	`70`	`QuantFusion(patterns)(converted_model)`