Merge branch 'main' into Arm-backend-Bump-cortex-m-size-test-threshold

zingo · web-flow · commit 5dcba6072bd1 · 2025-06-10T16:10:24.000+02:00
diff --git a/backends/arm/_passes/arm_pass_manager.py b/backends/arm/_passes/arm_pass_manager.py
@@ -62,7 +62,10 @@
     UnsqueezeScalarPlaceholdersPass,
 )
 
-from executorch.backends.arm.tosa_specification import TosaSpecification
+from executorch.backends.arm.tosa_specification import (
+    TosaLoweringContext,
+    TosaSpecification,
+)
 from executorch.backends.transforms.decompose_sdpa import (
     DecomposeScaledDotProductAttention,
 )
@@ -80,7 +83,8 @@ def __init__(self, tosa_spec: TosaSpecification) -> None:
         super().__init__()
 
     def _transform(self, graph_module: GraphModule):
-        return self(graph_module).graph_module
+        with TosaLoweringContext(self.tosa_spec):
+            return self(graph_module).graph_module
 
     def _tosa_080_BI_pipeline(self, exported_program: ExportedProgram) -> GraphModule:
         self.add_pass(FuseQuantizedActivationPass())
diff --git a/backends/arm/quantizer/arm_quantizer.py b/backends/arm/quantizer/arm_quantizer.py
@@ -247,9 +247,9 @@ def set_module_name(
         quantizer.set_module_name("blocks.sub"), it will quantize all supported operator/operator
         patterns in the submodule with this module name with the given `quantization_config`
         """
-        assert (
-            quantization_config is not None
-        ), " quantization_config == None is not supported yet"
+        # Validate that quantization_config is provided
+        if quantization_config is None:
+            raise ValueError("quantization_config == None is not supported yet")
         self.module_name_config[module_name] = quantization_config
         return self
 
diff --git a/backends/arm/quantizer/quantization_config.py b/backends/arm/quantizer/quantization_config.py
@@ -29,30 +29,40 @@ def get_input_act_qspec(self) -> QuantizationSpec | None:
         """Returns QuantizationSpec 'input_activation' after asserting that input_activation.qscheme is valid."""
         if self.input_activation is None:
             return None
-        assert self.input_activation.qscheme in [
+        # Validate that input_activation uses a supported qscheme
+        if self.input_activation.qscheme not in [
             torch.per_tensor_affine,
             torch.per_tensor_symmetric,
-        ], f"Unsupported quantization_spec {self.input_activation} for input_activation."
+        ]:
+            raise ValueError(
+                f"Unsupported quantization_spec {self.input_activation} for input_activation."
+            )
         return self.input_activation
 
     def get_output_act_qspec(self) -> QuantizationSpec | None:
         """Returns QuantizationSpec 'output_activation' after asserting that output_activation.qscheme is valid."""
         if self.output_activation is None:
             return None
-        assert self.output_activation.qscheme in [
+        # Validate that output_activation uses a supported qscheme
+        if self.output_activation.qscheme not in [
             torch.per_tensor_affine,
             torch.per_tensor_symmetric,
-        ], f"Unsupported quantization_spec {self.output_activation} for output_activation."
+        ]:
+            raise ValueError(
+                f"Unsupported quantization_spec {self.output_activation} for output_activation."
+            )
         return self.output_activation
 
     def get_weight_qspec(self) -> QuantizationSpec | None:
         """Returns QuantizationSpec 'weight' after asserting that weight.qscheme is valid."""
         if self.weight is None:
             return None
-        assert self.weight.qscheme in [
+        # Validate that weight uses a supported qscheme
+        if self.weight.qscheme not in [
             torch.per_tensor_symmetric,
             torch.per_channel_symmetric,
-        ], f"Unsupported quantization_spec {self.weight} for weight"
+        ]:
+            raise ValueError(f"Unsupported quantization_spec {self.weight} for weight")
         return self.weight
 
     def get_bias_qspec(self, node: torch.fx.Node) -> QuantizationSpec | None:
@@ -61,11 +71,11 @@ def get_bias_qspec(self, node: torch.fx.Node) -> QuantizationSpec | None:
         def _derive_qparams_fn(
             obs_or_fqs: list[ObserverOrFakeQuantize],
         ) -> tuple[torch.Tensor, torch.Tensor]:
-            assert (
-                len(obs_or_fqs) == 2
-            ), "Expecting two obs/fqs, one for activation and one for weight, got: {}".format(
-                len(obs_or_fqs)
-            )
+            # Validate expected number of observers/fake-quantizes
+            if len(obs_or_fqs) != 2:
+                raise ValueError(
+                    f"Expecting two obs/fqs, one for activation and one for weight, got: {len(obs_or_fqs)}"
+                )
             act_obs_or_fq = obs_or_fqs[0]
             weight_obs_or_fq = obs_or_fqs[1]
             act_scale, act_zp = act_obs_or_fq.calculate_qparams()
@@ -94,9 +104,11 @@ def _derive_qparams_fn(
 
         if self.bias is None:
             return None
-        assert (
-            self.bias.dtype == torch.float
-        ), "Only float dtype for bias is supported for bias right now"
+        # Validate that bias dtype is floating-point
+        if self.bias.dtype != torch.float:
+            raise ValueError(
+                "Only float dtype for bias is supported for bias right now"
+            )
         return self.bias
 
     def get_fixed_qspec(
diff --git a/backends/arm/runtime/EthosUBackend.cpp b/backends/arm/runtime/EthosUBackend.cpp
@@ -261,24 +261,12 @@ class EthosUBackend final : public ::executorch::runtime::BackendInterface {
             event_tracer,
             "+EthosUBackend::execute()handles.input.permute_CHW_to_HWC()");
         // permuted byte copy CHW to HWC
-        int c, h, w;
-        if (tensor_in.dim() == 4) {
-          c = tensor_in.size(1);
-          h = tensor_in.size(2);
-          w = tensor_in.size(3);
-        } else if (tensor_in.dim() == 5) {
-          c = tensor_in.size(2);
-          h = tensor_in.size(3);
-          w = tensor_in.size(4);
-        } else {
-          ET_LOG(
-              Error,
-              "Unsupported input tensor dimension %d, expected 4 or 5",
-              tensor_in.dim());
-          return Error::InvalidProgram;
-        }
         permute_CHW_to_HWC(
-            tensor_in.mutable_data_ptr<char>(), scratch_addr, c, h, w);
+            tensor_in.mutable_data_ptr<char>(),
+            scratch_addr,
+            tensor_in.size(1),
+            tensor_in.size(2),
+            tensor_in.size(3));
       } else if (both_char or both_int or both_short) {
         EXECUTORCH_PROF_SCOPE(
             event_tracer, "+EthosUBackend::execute()handles.input.memcpy()");
@@ -376,24 +364,12 @@ class EthosUBackend final : public ::executorch::runtime::BackendInterface {
             "+EthosUBackend::execute()handles.output.permute_HWC_to_CHW()");
 
         char* output_address = (char*)output_addr;
-        int c, h, w;
-        if (tensor_out.dim() == 4) {
-          c = tensor_out.size(1);
-          h = tensor_out.size(2);
-          w = tensor_out.size(3);
-        } else if (tensor_out.dim() == 5) {
-          c = tensor_out.size(2);
-          h = tensor_out.size(3);
-          w = tensor_out.size(4);
-        } else {
-          ET_LOG(
-              Error,
-              "Unsupported output tensor dimension %d, expected 4 or 5",
-              tensor_out.dim());
-          return Error::InvalidProgram;
-        }
         permute_HWC_to_CHW(
-            output_address, tensor_out.mutable_data_ptr<char>(), c, h, w);
+            output_address,
+            tensor_out.mutable_data_ptr<char>(),
+            tensor_out.size(1),
+            tensor_out.size(2),
+            tensor_out.size(3));
       } else {
         EXECUTORCH_PROF_SCOPE(
             event_tracer, "+EthosUBackend::execute()handles.output.move()");
@@ -454,14 +430,6 @@ class EthosUBackend final : public ::executorch::runtime::BackendInterface {
       if (permuted_shape) {
         ET_LOG(Debug, "Tensor input/output %d will be permuted", index);
       }
-    } else if (tensor.dim() == 5) {
-      // Same as above, but for 5D tensors.
-      permuted_shape = tensor.size(0) == io->shape[0] &&
-          tensor.size(1) == io->shape[1] && tensor.size(2) == io->shape[4] &&
-          tensor.size(3) == io->shape[2] && tensor.size(4) == io->shape[3];
-      if (permuted_shape) {
-        ET_LOG(Debug, "Tensor input/output %d will be permuted", index);
-      }
     }
     *is_permuted = permuted_shape;
     return Error::Ok;
diff --git a/backends/arm/tosa_specification.py b/backends/arm/tosa_specification.py
@@ -11,6 +11,7 @@
 # JIT compiler flows.
 #
 
+import contextvars
 import re
 from typing import List
 
@@ -214,3 +215,34 @@ def support_integer(self):
 
     def support_float(self):
         return "FP" in self.profiles
+
+
+class TosaLoweringContext:
+    """
+    A context manager to handle the TOSA specific aspects of the lowering process.
+    For now it only handles the TOSA specification context, but it can be extended
+    to include other policies or configurations.
+    """
+
+    # Define a context variable for the spec
+    tosa_spec_var: contextvars.ContextVar = contextvars.ContextVar("tosa_spec")
+
+    def __init__(self, spec: TosaSpecification):
+        self.spec = spec
+
+    def __enter__(self):
+        # Set the spec in the context variable and store the token for later reset
+        self.token = TosaLoweringContext.tosa_spec_var.set(self.spec)
+        return self
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        # Reset the context variable to its previous state
+        TosaLoweringContext.tosa_spec_var.reset(self.token)
+
+
+# A helper function to retrieve the current spec anywhere in your code
+def get_context_spec() -> TosaSpecification:
+    try:
+        return TosaLoweringContext.tosa_spec_var.get()
+    except LookupError:
+        raise RuntimeError("Function must be executed within a TosaLoweringContext")
diff --git a/backends/qualcomm/targets.bzl b/backends/qualcomm/targets.bzl
@@ -92,4 +92,5 @@ def define_common_targets():
         exported_deps = [
             ":schema",
         ],
+        platforms = [ANDROID],
     )
diff --git a/examples/apple/mps/scripts/mps_example.py b/examples/apple/mps/scripts/mps_example.py
@@ -145,7 +145,7 @@ def get_model_config(args):
     return model_config
 
 
-if __name__ == "__main__":
+if __name__ == "__main__":  # noqa: C901
     args = parse_args()
 
     if args.model_name not in MODEL_NAME_TO_MODEL:
diff --git a/examples/arm/setup.sh b/examples/arm/setup.sh
diff --git a/examples/models/llama/config/llm_config.py b/examples/models/llama/config/llm_config.py

Original file line number	Diff line number	Diff line change
`@@ -92,4 +92,5 @@ def define_common_targets():`
`92`	`92`	`exported_deps = [`
`93`	`93`	`":schema",`
`94`	`94`	`],`
	`95`	`+ platforms = [ANDROID],`
`95`	`96`	`)`