Fix asserts between PyTorch MPS backend and ExecuTorch MPS delegate (pytorch#16)

DenisVieriu97 · DenisVieriu97 · commit e3730f819c76 · 2024-08-08T19:44:09.000-07:00
* Fix asserts between PyTorch MPS backend and ExecuTorch MPS delegate

* Fix lint
diff --git a/examples/apple/mps/scripts/bench_utils.py b/examples/apple/mps/scripts/bench_utils.py
@@ -5,34 +5,47 @@
 
 import logging
 import time
+import unittest
+from typing import Tuple
 
 import torch
 from torch.export.exported_program import ExportedProgram
 
 
-def assert_outputs_equal(model_output, ref_output):
-    """
-    Helper testing function that asserts that the model output and the reference output
-    are equal with some tolerance. Due to numerical differences between eager mode and
-    the MPS's backend, we relax the detal such that absolute tolerance is 1e-3. and
-    relative tolerance is 1e-3.
-    """
-
-    # Compare the result from executor and eager mode direclty
-    if isinstance(ref_output, tuple) or isinstance(ref_output, list):
-        # Multiple outputs executor always returns tuple, even if there is one output
-        assert len(ref_output) == len(
-            model_output
-        ), "Length of outputs is not matching!"
-        for i in range(len(ref_output)):
-            assert torch.allclose(
-                model_output[i], ref_output[i], atol=1e-03, rtol=1e-03
-            )
-    else:
-        # If one output, eager returns tensor while executor tuple of size 1
-        assert torch.allclose(
-            model_output[0], ref_output, atol=1e-03, rtol=1e-03
-        ), "Outputs are not matching!"
+class TestModule(unittest.TestCase):
+    def assert_outputs_equal(self, model_output, ref_output, use_fp16: bool = False):
+        """
+        Helper testing function that asserts that the model output and the reference output
+        are equal with some tolerance. Due to numerical differences between eager mode and
+        the MPS's backend, we relax the detal such that absolute tolerance is 1e-3. and
+        relative tolerance is 1e-3.
+        """
+        # Compare the result from executor and eager mode direclty
+        if isinstance(ref_output, tuple) or isinstance(ref_output, list):
+            # Multiple outputs executor always returns tuple, even if there is one output
+            assert len(ref_output) == len(
+                model_output
+            ), "Length of outputs is not matching!"
+            for i in range(len(ref_output)):
+                res_output = model_output[i].cpu()
+                ref_output = ref_output[i].cpu()
+                if use_fp16 and ref_output.dtype == torch.float16:
+                    # cast back from fp16 to fp32 (ExecuTorch results are in FP32 by default)
+                    ref_output = ref_output.to(torch.float32)
+
+                mean_err = ((res_output - ref_output).abs() / ref_output).mean()
+                logging.info(f"mean err = {mean_err}")
+                self.assertLess(mean_err, 0.05)
+        else:
+            # If one output, eager returns tensor while executor tuple of size 1
+            if use_fp16 and ref_output.dtype == torch.float16:
+                # cast back from fp16 to fp32 (ExecuTorch results are in FP32 by default)
+                ref_output = ref_output.to(torch.float32)
+            ref_output = ref_output.cpu()
+            res_output = model_output[0].cpu()
+            mean_err = ((res_output - ref_output).abs() / ref_output).mean()
+            logging.info(f"mean err = {mean_err}")
+            self.assertLess(mean_err, 0.05)
 
 
 def bench_forward(func, *args):
@@ -101,17 +114,31 @@ def bench_torch(executorch_program: ExportedProgram, model, inputs, model_name):
         )
 
 
-def compare_outputs(executorch_program: ExportedProgram, model, inputs, model_name):
+def compare_outputs(
+    executorch_program: ExportedProgram,
+    model: torch.nn.Module,
+    inputs: Tuple[torch.tensor],
+    model_name: str,
+    use_fp16: bool,
+):
+    test_module = TestModule()
     inputs_copy = []
+    if use_fp16:
+        model = model.to(torch.float16)
+    model = model
     for t in inputs:
-        inputs_copy.append(t.detach().clone())
+        tensor = t.detach().clone()
+        if use_fp16 and tensor.dtype == torch.float32:
+            tensor = tensor.to(torch.float16)
+        inputs_copy.append(tensor)
     inputs_copy = tuple(inputs_copy)
 
-    pytorch_results = model(*inputs)
+    pytorch_results = model(*inputs_copy)
+
     executorch_model = get_executorch_model(executorch_program)
     if executorch_model is not None:
-        executorch_results = executorch_model.forward(inputs_copy)
-        assert_outputs_equal(executorch_results, pytorch_results)
+        executorch_results = executorch_model.forward(inputs)
+        test_module.assert_outputs_equal(executorch_results, pytorch_results, use_fp16)
         logging.info(
             f"Results between ExecuTorch forward pass with MPS backend and PyTorch forward pass for {model_name} are matching!"
         )
diff --git a/examples/apple/mps/scripts/mps_example.py b/examples/apple/mps/scripts/mps_example.py
@@ -155,6 +155,8 @@ def get_model_config(args):
     model, example_inputs, _ = EagerModelFactory.create_model(**model_config)
 
     model = model.eval()
+
+    # Deep copy the model inputs to check against PyTorch forward pass
     if args.check_correctness or args.bench_pytorch:
         model_copy = copy.deepcopy(model)
         inputs_copy = []
@@ -228,4 +230,6 @@ def get_model_config(args):
         bench_torch(executorch_program, model_copy, example_inputs, model_name)
 
     if args.check_correctness:
-        compare_outputs(executorch_program, model_copy, inputs_copy, model_name)
+        compare_outputs(
+            executorch_program, model_copy, inputs_copy, model_name, args.use_fp16
+        )