pytorch
diff --git a/‎.ci/docker/ci_commit_pins/pytorch.txt
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/ci_commit_pins/pytorch.txt
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/docker/requirements-ci.txt
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/requirements-ci.txt
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/cadence/aot/TARGETS
Lines changed: 1 addition & 0 deletions b/‎backends/cadence/aot/TARGETS
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/cadence/aot/compiler.py
Lines changed: 13 additions & 4 deletions b/‎backends/cadence/aot/compiler.py
Lines changed: 13 additions & 4 deletions
diff --git a/‎backends/cadence/aot/export_example.py
Lines changed: 14 additions & 53 deletions b/‎backends/cadence/aot/export_example.py
Lines changed: 14 additions & 53 deletions
diff --git a/‎backends/cadence/aot/utils.py
Lines changed: 36 additions & 1 deletion b/‎backends/cadence/aot/utils.py
Lines changed: 36 additions & 1 deletion
diff --git a/‎backends/qualcomm/aot/wrappers/TensorWrapper.cpp
Lines changed: 3 additions & 1 deletion b/‎backends/qualcomm/aot/wrappers/TensorWrapper.cpp
Lines changed: 3 additions & 1 deletion
diff --git a/‎backends/qualcomm/builders/op_avg_pool2d.py
Lines changed: 2 additions & 2 deletions b/‎backends/qualcomm/builders/op_avg_pool2d.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎backends/qualcomm/passes/convert_to_linear.py
Lines changed: 24 additions & 23 deletions b/‎backends/qualcomm/passes/convert_to_linear.py
Lines changed: 24 additions & 23 deletions
diff --git a/‎backends/qualcomm/passes/expand_broadcast_tensor_shape.py
Lines changed: 58 additions & 0 deletions b/‎backends/qualcomm/passes/expand_broadcast_tensor_shape.py
Lines changed: 58 additions & 0 deletions
@@ -1 +1 @@
-4b2970f7cd3cdd56883cacf116a8693862f89db5
+d1b87e26e5c4343f5b56bb1e6f89b479b389bfac
@@ -1,5 +1,5 @@
 mpmath==1.3.0
-numpy==1.21.3; python_version == '3.10'
+numpy==1.22.0; python_version == '3.10'
 numpy==1.23.2; python_version == '3.11'
 numpy; python_version >= '3.12'
 PyYAML==6.0.1
 
@@ -22,6 +22,7 @@ python_library(
     deps = [
         "fbsource//third-party/pypi/tabulate:tabulate",
         "//caffe2:torch",
+        "//executorch/exir:lib",
         "//executorch/exir:memory",
         "//executorch/exir/dialects:lib",
         "//executorch/exir/dialects/edge:lib",
 
@@ -36,6 +36,8 @@
 from torch.export import export
 from torch.export.exported_program import ExportedProgram
 
+from .utils import print_ops_info
+
 
 # Note: this is not meant as a primary API since it can create inconsistencies
 # if the quantizer here is different from the quantizer used to convert. It is
@@ -193,16 +195,17 @@ def export_to_edge(
 
 
 # Export the model and lower it to an EdgeProgramManager (in edge IR), and
-# apply passes specific to Cadence DSP execution.
+# apply passes specific to Cadence DSP execution. Return both to print the
+# differences.
 def export_to_cadence(
     model: torch.nn.Module,
     inputs: tuple[object, ...],
     dump_graphs: bool = False,
 ) -> EdgeProgramManager:
-    edge_program_manager = export_to_edge(model, inputs)
+    edge_prog_manager = export_to_edge(model, inputs)
 
     # Run a couple required passes for quant/dequant ops
-    cadence_program_manager = edge_program_manager.transform(
+    cadence_prog_manager = edge_prog_manager.transform(
         [
             InitializePipeline(),
             RemoveZeroSizedCatArgsPass(),
@@ -216,4 +219,10 @@ def export_to_cadence(
         ]
     )
 
-    return cadence_program_manager
+    # Print some information to terminal
+    print_ops_info(
+        edge_prog_manager.exported_program().graph_module,
+        cadence_prog_manager.exported_program().graph_module,
+    )
+
+    return cadence_prog_manager
@@ -10,61 +10,26 @@
 import tempfile
 
 from executorch.backends.cadence.aot.ops_registrations import *  # noqa
-import os
 from typing import Any, Tuple
 
 from executorch.backends.cadence.aot.compiler import (
     convert_pt2,
     export_to_cadence,
-    export_to_edge,
-    quantize_pt2,
+    fuse_pt2,
 )
 from executorch.backends.cadence.aot.quantizer.quantizer import CadenceQuantizer
 from executorch.backends.cadence.runtime import runtime
 from executorch.backends.cadence.runtime.executor import BundledProgramManager
 from executorch.exir import ExecutorchProgramManager
 from torch import nn
 
-from .utils import print_ops_info
+from .utils import save_bpte_program, save_pte_program
 
 
 FORMAT = "[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s"
 logging.basicConfig(level=logging.INFO, format=FORMAT)
 
 
-def _save_pte_program(
-    prog: ExecutorchProgramManager, model_name: str, output_dir: str = ""
-) -> None:
-    if model_name.endswith(".pte"):
-        filename = model_name
-    else:
-        filename = os.path.join(output_dir, f"{model_name}.pte")
-
-    try:
-        with open(filename, "wb") as file:
-            prog.write_to_file(file)
-            logging.info(f"Saved exported program to {filename}")
-    except Exception as e:
-        logging.error(f"Error while saving to {filename}: {e}")
-
-
-def _save_bpte_program(
-    buffer: bytes,
-    model_name: str,
-    output_dir: str = "",
-) -> None:
-    if model_name.endswith(".bpte"):
-        filename = model_name
-    else:
-        filename = os.path.join(output_dir, f"{model_name}.bpte")
-    try:
-        with open(filename, "wb") as f:
-            f.write(buffer)
-        logging.info(f"Saved exported program to {filename}")
-    except Exception as e:
-        logging.error(f"Error while saving to {output_dir}: {e}")
-
-
 def export_model(
     model: nn.Module,
     example_inputs: Tuple[Any, ...],
@@ -74,32 +39,28 @@ def export_model(
     working_dir = tempfile.mkdtemp(dir="/tmp")
     logging.debug(f"Created work directory {working_dir}")
 
-    # convert the model (also called in quantize_pt2)
-    converted_model = convert_pt2(model, example_inputs, CadenceQuantizer())
+    # Instantiate the quantizer
+    quantizer = CadenceQuantizer()
 
-    # Get reference outputs from quantized_model
-    ref_outputs = converted_model(*example_inputs)
+    # Convert the model
+    converted_model = convert_pt2(model, example_inputs, quantizer)
 
-    # Quantize the model
-    quantized_model = quantize_pt2(model, example_inputs)
+    # Get reference outputs from converted model
+    ref_outputs = converted_model(*example_inputs)
 
-    # Get edge program (also called in export_to_cadence)
-    edge_prog_manager = export_to_edge(quantized_model, example_inputs)
+    # Quantize the model (note: quantizer needs to be the same as
+    # the one used in convert_pt2)
+    quantized_model = fuse_pt2(converted_model, quantizer)
 
     # Get edge program after Cadence specific passes
     cadence_prog_manager = export_to_cadence(quantized_model, example_inputs)
 
+    # Get executorch program after Cadence specific passes
     exec_prog: ExecutorchProgramManager = cadence_prog_manager.to_executorch()
 
     logging.info("Final exported graph:\n")
     exec_prog.exported_program().graph_module.graph.print_tabular()
 
-    # Print some information to terminal
-    print_ops_info(
-        edge_prog_manager.exported_program().graph_module,
-        cadence_prog_manager.exported_program().graph_module,
-    )
-
     forward_test_data = BundledProgramManager.bundled_program_test_data_gen(
         method="forward", inputs=example_inputs, expected_outputs=ref_outputs
     )
@@ -110,9 +71,9 @@ def export_model(
         forward_test_data,
     )
     # Save the program as pte (default name is CadenceDemoModel.pte)
-    _save_pte_program(exec_prog, file_name, working_dir)
+    save_pte_program(exec_prog, file_name, working_dir)
     # Save the program as btpe (default name is CadenceDemoModel.bpte)
-    _save_bpte_program(buffer, file_name, working_dir)
+    save_bpte_program(buffer, file_name, working_dir)
 
     logging.debug(
         f"Executorch bundled program buffer saved to {file_name} is {len(buffer)} total bytes"
 
@@ -8,10 +8,12 @@
 
 import logging
 import operator
+import os
 from typing import Dict, List, Tuple
 
 import torch
-from executorch.exir import memory
+
+from executorch.exir import ExecutorchProgramManager, memory
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.dialects.edge._ops import EdgeOpOverload, EdgeOpOverloadPacket
 from tabulate import tabulate
@@ -185,3 +187,36 @@ def model_gm_has_SDPA(model_gm: torch.fx.GraphModule) -> bool:
             if node.target == torch.ops.aten.scaled_dot_product_attention.default:
                 return True
     return False
+
+
+def save_pte_program(
+    prog: ExecutorchProgramManager, model_name: str, output_dir: str = ""
+) -> None:
+    if model_name.endswith(".pte"):
+        filename = model_name
+    else:
+        filename = os.path.join(output_dir, f"{model_name}.pte")
+
+    try:
+        with open(filename, "wb") as file:
+            prog.write_to_file(file)
+            logging.info(f"Saved exported program to {filename}")
+    except Exception as e:
+        logging.error(f"Error while saving to {filename}: {e}")
+
+
+def save_bpte_program(
+    buffer: bytes,
+    model_name: str,
+    output_dir: str = "",
+) -> None:
+    if model_name.endswith(".bpte"):
+        filename = model_name
+    else:
+        filename = os.path.join(output_dir, f"{model_name}.bpte")
+    try:
+        with open(filename, "wb") as f:
+            f.write(buffer)
+        logging.info(f"Saved exported program to {filename}")
+    except Exception as e:
+        logging.error(f"Error while saving to {output_dir}: {e}")
@@ -91,7 +91,9 @@ TensorWrapper::TensorWrapper(
   if (data != nullptr) {
     QNN_VER_PTR(tensor_)->clientBuf.dataSize = bytes;
 
-    if (copy_data) {
+    if (tensor_type != QNN_TENSOR_TYPE_STATIC) {
+      QNN_VER_PTR(tensor_)->clientBuf.data = nullptr;
+    } else if (copy_data) {
       owned_data_ = std::make_unique<char[]>(bytes);
       const char* src_data = static_cast<const char*>(data);
       std::memcpy(owned_data_.get(), src_data, bytes);
 
@@ -51,8 +51,8 @@ def define_node(
             filter_size = filter_size + filter_size
         filter_size_shape = [len(filter_size)]
 
-        # stride info
-        stride = cast(List[int], node.args[2])
+        # stride info - default to kernel_size if not given
+        stride = cast(List[int], node.args[2]) if len(node.args) > 2 else filter_size
         if len(stride) == 1:
             stride = stride + stride
         stride_shape = [len(stride)]
 
@@ -109,49 +109,50 @@ def _convert_to_linear(
 
         # Since QNN has no keep dims for linear op, we will need to add squeeze and unsqueeze around linear node
         # TODO: Find a more general conditional statement.
-        if (
-            fn_node.target == self.add
-            and linear_node.meta["val"].dim() == 3
-            and linear_node.meta["val"].shape[0] == 1
-        ):
-            squeeze_dim = linear_node.meta["val"].shape[1:]
-            linear_node.meta["val"] = torch.squeeze(linear_node.meta["val"], 0)
+        linear_output = linear_node.meta["val"]
+        if linear_output.dim() == 3 and linear_output.shape[0] == 1:
             with gm.graph.inserting_after(input_node):
                 input_users = list(input_node.users.keys())
-                squeeze_dim = linear_node.meta["val"].shape
-                squeeze_view_copy_node = gm.graph.create_node(
+                input_tensor = input_node.meta["val"]
+                squeeze_dim = input_tensor.shape[-2:]
+                squeeze_node = gm.graph.create_node(
                     "call_function",
                     self.view_copy,
                     (
                         input_node,
                         squeeze_dim,
                     ),
                 )
-                squeeze_view_copy_node.meta = linear_node.meta
+                # meta needs to be copied elementwisely for fake-tensor
+                # to be updated correctly and not affect meta of input_node
+                for k, v in input_node.meta.items():
+                    squeeze_node.meta[k] = v
+                squeeze_node.meta["val"] = input_tensor.reshape(squeeze_dim)
                 for user in input_users:
                     if user == linear_node:
-                        user.replace_input_with(input_node, squeeze_view_copy_node)
-            with gm.graph.inserting_after(output):
+                        user.replace_input_with(input_node, squeeze_node)
+
+            with gm.graph.inserting_after(linear_node):
                 output_users = list(linear_node.users.keys())
-                unsqueeze_dim = output.args[0].meta["val"].shape
-                unsqueeze_view_copy_node = gm.graph.create_node(
+                unsqueeze_dim = linear_output.shape
+                unsqueeze_node = gm.graph.create_node(
                     "call_function",
                     self.view_copy,
                     (
                         linear_node,
                         unsqueeze_dim,
                     ),
                 )
-                unsqueeze_view_copy_node.meta = output.args[0].meta
+                # meta needs to be copied elementwisely for fake-tensor
+                # to be updated correctly and not affect meta of unsqueeze_node
+                for k, v in linear_node.meta.items():
+                    unsqueeze_node.meta[k] = v
+                # update linear node's shape
+                linear_node.meta["val"] = linear_output.reshape(
+                    linear_output.shape[-2:]
+                )
                 for user in output_users:
-                    user.replace_input_with(linear_node, unsqueeze_view_copy_node)
-            if QCOM_QUANT_ATTRS in linear_node.meta:
-                squeeze_view_copy_node.meta[QCOM_QUANT_ATTRS] = linear_node.meta[
-                    QCOM_QUANT_ATTRS
-                ]
-                unsqueeze_view_copy_node.meta[QCOM_QUANT_ATTRS] = linear_node.meta[
-                    QCOM_QUANT_ATTRS
-                ]
+                    user.replace_input_with(linear_node, unsqueeze_node)
 
     def _extract_mm_ops(self, partitioned_nodes: List[edge_op]) -> List[torch.fx.Node]:
         mm_node = [n for n in partitioned_nodes if n.target == self.mm][0]
 
@@ -0,0 +1,58 @@
+# Copyright (c) Qualcomm Innovation Center, Inc.
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+from executorch.exir.dialects._ops import ops as exir_ops
+from executorch.exir.pass_base import ExportPass, PassResult
+from executorch.exir.passes import dead_code_elimination_pass
+
+
+class ExpandBroadcastTensorShape(ExportPass):
+    """
+    Make tensors have same rank for layout-transform to work properly.
+    """
+
+    def __init__(self):
+        super(ExpandBroadcastTensorShape, self).__init__()
+        self.broadcast_op_targets = [
+            exir_ops.edge.aten.add.Tensor,
+            exir_ops.edge.aten.sub.Tensor,
+            exir_ops.edge.aten.mul.Tensor,
+            exir_ops.edge.aten.div.Tensor,
+        ]
+
+    def traverse_broadcast_node(self, graph_module: torch.fx.GraphModule):
+        for node in graph_module.graph.nodes:
+            if node.target in self.broadcast_op_targets:
+                for arg in node.args:
+                    input_rank = len(arg.meta["val"].shape)
+                    output_rank = len(node.meta["val"].shape)
+                    if input_rank != output_rank:
+                        with graph_module.graph.inserting_after(arg):
+                            new_rank = [1] * (output_rank - input_rank) + list(
+                                arg.meta["val"].shape
+                            )
+                            users = list(arg.users.keys())
+                            reshape_node = graph_module.graph.create_node(
+                                "call_function",
+                                exir_ops.edge.aten.view_copy.default,
+                                (arg, tuple(new_rank)),
+                            )
+                            # meta needs to be copied elementwisely for fake-tensor
+                            # to be updated correctly and not affect meta of arg
+                            for k, v in arg.meta.items():
+                                reshape_node.meta[k] = v
+                            reshape_node.meta["val"] = reshape_node.meta["val"].reshape(
+                                new_rank
+                            )
+                            for user in users:
+                                user.replace_input_with(arg, reshape_node)
+
+    def call(self, graph_module: torch.fx.GraphModule):
+        self.traverse_broadcast_node(graph_module)
+        graph_module.recompile()
+        dead_code_elimination_pass(graph_module)
+        return PassResult(graph_module, True)
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-4b2970f7cd3cdd56883cacf116a8693862f89db5`
	`1`	`+d1b87e26e5c4343f5b56bb1e6f89b479b389bfac`