[ArmBackend] Minor improvements to model unit tests

freddan80 · freddan80 · commit 103dd7e1023e · 2025-02-13T12:28:46.000+01:00
* Tighten numerical tolerance for MobileNetV2 test and ensure
randomness by using test fwk for generating test vectors
* Make sure to calibrate and test model with different data

Change-Id: I5e345a2ba1fee8272abb498eceda4b829e2b9e72
Signed-off-by: Fredrik Knutsson &lt;fredrik.knutsson@arm.com&gt;
diff --git a/backends/arm/test/models/test_conformer.py b/backends/arm/test/models/test_conformer.py
@@ -18,6 +18,10 @@
 logger.setLevel(logging.INFO)
 
 
+def get_test_inputs(dim, lengths, num_examples):
+    return (torch.rand(num_examples, int(lengths.max()), dim), lengths)
+
+
 class TestConformer(unittest.TestCase):
     """Tests Torchaudio Conformer"""
 
@@ -41,8 +45,9 @@ class TestConformer(unittest.TestCase):
     }
 
     dim = 16
-    lengths = torch.randint(1, 100, (10,), dtype=torch.int32)
-    input_data = torch.rand(10, int(lengths.max()), dim)
+    num_examples = 10
+    lengths = torch.randint(1, 100, (num_examples,), dtype=torch.int32)
+    model_example_inputs = get_test_inputs(dim, lengths, num_examples)
     conformer = Conformer(
         input_dim=dim,
         num_heads=4,
@@ -56,7 +61,7 @@ def test_conformer_tosa_MI(self):
         (
             ArmTester(
                 self.conformer,
-                example_inputs=(self.input_data, self.lengths),
+                example_inputs=self.model_example_inputs,
                 compile_spec=common.get_tosa_compile_spec(tosa_spec="TOSA-0.80+MI"),
             )
             .export()
@@ -66,7 +71,9 @@ def test_conformer_tosa_MI(self):
             .to_executorch()
             # TODO(MLETORCH-632): Fix numerical errors
             .run_method_and_compare_outputs(
-                inputs=(self.input_data, self.lengths), rtol=1, atol=5
+                rtol=1.0,
+                atol=5.0,
+                inputs=get_test_inputs(self.dim, self.lengths, self.num_examples),
             )
         )
 
@@ -75,15 +82,18 @@ def test_conformer_tosa_BI(self):
         (
             ArmTester(
                 self.conformer,
-                example_inputs=(self.input_data, self.lengths),
+                example_inputs=self.model_example_inputs,
                 compile_spec=common.get_tosa_compile_spec(tosa_spec="TOSA-0.80+BI"),
             )
             .quantize()
             .export()
             .to_edge_transform_and_lower()
             .to_executorch()
             .run_method_and_compare_outputs(
-                qtol=1, rtol=1, atol=5, inputs=(self.input_data, self.lengths)
+                qtol=1.0,
+                rtol=1.0,
+                atol=5.0,
+                inputs=get_test_inputs(self.dim, self.lengths, self.num_examples),
             )
         )
 
@@ -92,7 +102,7 @@ def test_conformer_u55_BI(self):
         tester = (
             ArmTester(
                 self.conformer,
-                example_inputs=(self.input_data, self.lengths),
+                example_inputs=self.model_example_inputs,
                 compile_spec=common.get_u55_compile_spec(),
             )
             .quantize()
@@ -103,15 +113,18 @@ def test_conformer_u55_BI(self):
         )
         if conftest.is_option_enabled("corstone_fvp"):
             tester.run_method_and_compare_outputs(
-                atol=1.0, qtol=1, inputs=(self.input_data, self.lengths)
+                qtol=1.0,
+                rtol=1.0,
+                atol=5.0,
+                inputs=get_test_inputs(self.dim, self.lengths, self.num_examples),
             )
 
     @unittest.expectedFailure  # TODO(MLETORCH-635)
     def test_conformer_u85_BI(self):
         tester = (
             ArmTester(
                 self.conformer,
-                example_inputs=(self.input_data, self.lengths),
+                example_inputs=self.model_example_inputs,
                 compile_spec=common.get_u85_compile_spec(),
             )
             .quantize()
@@ -122,5 +135,8 @@ def test_conformer_u85_BI(self):
         )
         if conftest.is_option_enabled("corstone_fvp"):
             tester.run_method_and_compare_outputs(
-                atol=1.0, qtol=1, inputs=(self.input_data, self.lengths)
+                qtol=1.0,
+                rtol=1.0,
+                atol=5.0,
+                inputs=get_test_inputs(self.dim, self.lengths, self.num_examples),
             )
diff --git a/backends/arm/test/models/test_dl3_arm.py b/backends/arm/test/models/test_dl3_arm.py
@@ -17,36 +17,38 @@ class TestDl3(unittest.TestCase):
     """Tests DeepLabv3."""
 
     dl3 = deeplab_v3.DeepLabV3ResNet50Model()
-    model_inputs = dl3.get_example_inputs()
+    model_example_inputs = dl3.get_example_inputs()
     dl3 = dl3.get_eager_model()
 
     @unittest.expectedFailure
     def test_dl3_tosa_MI(self):
         (
             ArmTester(
                 self.dl3,
-                example_inputs=self.model_inputs,
+                example_inputs=self.model_example_inputs,
                 compile_spec=common.get_tosa_compile_spec("TOSA-0.80+MI"),
             )
             .export()
             .to_edge_transform_and_lower()
             .to_executorch()
-            .run_method_and_compare_outputs(self.model_inputs)
+            .run_method_and_compare_outputs(inputs=self.dl3.get_example_inputs())
         )
 
     @unittest.expectedFailure
     def test_dl3_tosa_BI(self):
         (
             ArmTester(
                 self.dl3,
-                example_inputs=self.model_inputs,
+                example_inputs=self.model_example_inputs,
                 compile_spec=common.get_tosa_compile_spec("TOSA-0.80+BI"),
             )
             .quantize()
             .export()
             .to_edge_transform_and_lower()
             .to_executorch()
-            .run_method_and_compare_outputs(atol=1.0, qtol=1, inputs=self.model_inputs)
+            .run_method_and_compare_outputs(
+                atol=1.0, qtol=1, inputs=self.dl3.get_example_inputs()
+            )
         )
 
     @pytest.mark.slow
@@ -56,7 +58,7 @@ def test_dl3_u55_BI(self):
         tester = (
             ArmTester(
                 self.dl3,
-                example_inputs=self.model_inputs,
+                example_inputs=self.model_example_inputs,
                 compile_spec=common.get_u55_compile_spec(),
             )
             .quantize()
@@ -67,7 +69,7 @@ def test_dl3_u55_BI(self):
         )
         if conftest.is_option_enabled("corstone_fvp"):
             tester.run_method_and_compare_outputs(
-                atol=1.0, qtol=1, inputs=self.model_inputs
+                atol=1.0, qtol=1, inputs=self.dl3.get_example_inputs()
             )
 
     @pytest.mark.slow
@@ -77,7 +79,7 @@ def test_dl3_u85_BI(self):
         tester = (
             ArmTester(
                 self.dl3,
-                example_inputs=self.model_inputs,
+                example_inputs=self.model_example_inputs,
                 compile_spec=common.get_u85_compile_spec(),
             )
             .quantize()
@@ -88,5 +90,5 @@ def test_dl3_u85_BI(self):
         )
         if conftest.is_option_enabled("corstone_fvp"):
             tester.run_method_and_compare_outputs(
-                atol=1.0, qtol=1, inputs=self.model_inputs
+                atol=1.0, qtol=1, inputs=self.dl3.get_example_inputs()
             )
diff --git a/backends/arm/test/models/test_lstm_arm.py b/backends/arm/test/models/test_lstm_arm.py
@@ -16,6 +16,13 @@
 from torch.nn.quantizable.modules import rnn
 
 
+def get_test_inputs():
+    return (
+        torch.randn(5, 3, 10),  # input
+        (torch.randn(2, 3, 20), torch.randn(2, 3, 20)),  # (h0, c0)
+    )
+
+
 class TestLSTM(unittest.TestCase):
     """Tests quantizable LSTM module."""
 
@@ -27,46 +34,43 @@ class TestLSTM(unittest.TestCase):
     lstm = rnn.LSTM(10, 20, 2)
     lstm = lstm.eval()
 
-    input_tensor = torch.randn(5, 3, 10)
-    h0 = torch.randn(2, 3, 20)
-    c0 = torch.randn(2, 3, 20)
-
-    model_inputs = (input_tensor, (h0, c0))
+    # Used e.g. for quantization calibration and shape extraction in the tester
+    model_example_inputs = get_test_inputs()
 
     def test_lstm_tosa_MI(self):
         (
             ArmTester(
                 self.lstm,
-                example_inputs=self.model_inputs,
+                example_inputs=self.model_example_inputs,
                 compile_spec=common.get_tosa_compile_spec("TOSA-0.80+MI"),
             )
             .export()
             .to_edge_transform_and_lower()
             .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
             .to_executorch()
-            .run_method_and_compare_outputs(inputs=self.model_inputs)
+            .run_method_and_compare_outputs(inputs=get_test_inputs())
         )
 
     def test_lstm_tosa_BI(self):
         (
             ArmTester(
                 self.lstm,
-                example_inputs=self.model_inputs,
+                example_inputs=self.model_example_inputs,
                 compile_spec=common.get_tosa_compile_spec("TOSA-0.80+BI"),
             )
             .quantize()
             .export()
             .to_edge_transform_and_lower()
             .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
             .to_executorch()
-            .run_method_and_compare_outputs(atol=3e-1, qtol=1, inputs=self.model_inputs)
+            .run_method_and_compare_outputs(atol=3e-1, qtol=1, inputs=get_test_inputs())
         )
 
     def test_lstm_u55_BI(self):
         tester = (
             ArmTester(
                 self.lstm,
-                example_inputs=self.model_inputs,
+                example_inputs=self.model_example_inputs,
                 compile_spec=common.get_u55_compile_spec(),
             )
             .quantize()
@@ -78,14 +82,14 @@ def test_lstm_u55_BI(self):
         )
         if conftest.is_option_enabled("corstone_fvp"):
             tester.run_method_and_compare_outputs(
-                atol=3e-1, qtol=1, inputs=self.model_inputs
+                atol=3e-1, qtol=1, inputs=get_test_inputs()
             )
 
     def test_lstm_u85_BI(self):
         tester = (
             ArmTester(
                 self.lstm,
-                example_inputs=self.model_inputs,
+                example_inputs=self.model_example_inputs,
                 compile_spec=common.get_u85_compile_spec(),
             )
             .quantize()
@@ -97,5 +101,5 @@ def test_lstm_u85_BI(self):
         )
         if conftest.is_option_enabled("corstone_fvp"):
             tester.run_method_and_compare_outputs(
-                atol=3e-1, qtol=1, inputs=self.model_inputs
+                atol=3e-1, qtol=1, inputs=get_test_inputs()
             )
diff --git a/backends/arm/test/models/test_mobilenet_v2_arm.py b/backends/arm/test/models/test_mobilenet_v2_arm.py
@@ -32,50 +32,37 @@ class TestMobileNetV2(unittest.TestCase):
     normalize = transforms.Normalize(
         mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
     )
-    model_inputs = (normalize(torch.randn((1, 3, 224, 224))),)
 
-    all_operators = {
-        "executorch_exir_dialects_edge__ops_aten__native_batch_norm_legit_no_training_default",
-        "executorch_exir_dialects_edge__ops_aten_add_Tensor",
-        "executorch_exir_dialects_edge__ops_aten_permute_copy_default",
-        "executorch_exir_dialects_edge__ops_aten_addmm_default",
-        "executorch_exir_dialects_edge__ops_aten_mean_dim",
-        "executorch_exir_dialects_edge__ops_aten_hardtanh_default",
-        "executorch_exir_dialects_edge__ops_aten_convolution_default",
-    }
-
-    operators_after_quantization = all_operators - {
-        "executorch_exir_dialects_edge__ops_aten__native_batch_norm_legit_no_training_default",
-    }
+    # Used e.g. for quantization calibration and shape extraction in the tester
+    model_example_inputs = (normalize(torch.randn((1, 3, 224, 224))),)
 
     def test_mv2_tosa_MI(self):
         (
             ArmTester(
                 self.mv2,
-                example_inputs=self.model_inputs,
+                example_inputs=self.model_example_inputs,
                 compile_spec=common.get_tosa_compile_spec("TOSA-0.80+MI"),
             )
             .export()
             .to_edge_transform_and_lower()
+            .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
             .to_executorch()
-            .run_method_and_compare_outputs(inputs=self.model_inputs)
+            .run_method_and_compare_outputs()
         )
 
     def test_mv2_tosa_BI(self):
         (
             ArmTester(
                 self.mv2,
-                example_inputs=self.model_inputs,
+                example_inputs=self.model_example_inputs,
                 compile_spec=common.get_tosa_compile_spec("TOSA-0.80+BI"),
             )
             .quantize()
             .export()
             .to_edge_transform_and_lower()
+            .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
             .to_executorch()
-            # atol=1.0 is a defensive upper limit
-            # TODO MLETROCH-72
-            # TODO MLETROCH-149
-            .run_method_and_compare_outputs(atol=1.0, qtol=1, inputs=self.model_inputs)
+            .run_method_and_compare_outputs(rtol=0.001, atol=0.2, qtol=1)
         )
 
     @pytest.mark.slow
@@ -84,7 +71,7 @@ def test_mv2_u55_BI(self):
         tester = (
             ArmTester(
                 self.mv2,
-                example_inputs=self.model_inputs,
+                example_inputs=self.model_example_inputs,
                 compile_spec=common.get_u55_compile_spec(),
             )
             .quantize()
@@ -95,7 +82,9 @@ def test_mv2_u55_BI(self):
         )
         if conftest.is_option_enabled("corstone_fvp"):
             tester.run_method_and_compare_outputs(
-                atol=1.0, qtol=1, inputs=self.model_inputs
+                rtol=0.001,
+                atol=0.2,
+                qtol=1,
             )
 
     @pytest.mark.slow
@@ -104,7 +93,7 @@ def test_mv2_u85_BI(self):
         tester = (
             ArmTester(
                 self.mv2,
-                example_inputs=self.model_inputs,
+                example_inputs=self.model_example_inputs,
                 compile_spec=common.get_u85_compile_spec(),
             )
             .quantize()
@@ -115,5 +104,7 @@ def test_mv2_u85_BI(self):
         )
         if conftest.is_option_enabled("corstone_fvp"):
             tester.run_method_and_compare_outputs(
-                atol=1.0, qtol=1, inputs=self.model_inputs
+                rtol=0.001,
+                atol=0.2,
+                qtol=1,
             )