Arm backend: Use quant model for bundled PTE when available

digantdesai · digantdesai · commit 7c0c99bc2aa2 · 2025-05-08T22:34:00.000-07:00
diff --git a/examples/arm/aot_arm_compiler.py b/examples/arm/aot_arm_compiler.py
@@ -759,7 +759,9 @@ def to_edge_no_delegate(exported_program, args, model: torch.nn.Module, example_
             output_name = os.path.join(args.output, output_name)
 
     if args.bundleio:
-        save_bpte_program(exec_prog, original_model, output_name)
+        # Realize the quantization impact on numerics when generating reference output
+        reference_model = original_model if not model_int8 else model_int8
+        save_bpte_program(exec_prog, reference_model, output_name)
         print(f"Bundle PTE file saved as {output_name}")
     else:
         save_pte_program(exec_prog, output_name)