Fix flaky ET attention test

jackzhxng · larryliu0820 · commit 15f0fdf7cfec · 2024-11-12T15:21:22.000-08:00
diff --git a/extension/llm/modules/test/test_attention.py b/extension/llm/modules/test/test_attention.py
@@ -94,7 +94,10 @@ def test_attention_eager(self):
         et_res = self.et_mha(self.x, self.x)  # Self attention.
         tt_res = self.tt_mha(self.x, self.x)  # Self attention.
 
-        self.assertTrue(torch.allclose(et_res, tt_res))
+        self.assertTrue(
+            torch.allclose(et_res, tt_res),
+            msg=f"TorchTune output is not close to ET output.\n\nTorchTune: {tt_res}\nET output: {et_res}",
+        )
 
         # test with kv cache
         self.et_mha.setup_cache(1, dtype=torch.float32, max_seq_len=20)
@@ -136,7 +139,10 @@ def test_attention_export(self):
         )
         et_res = et_mha_ep.module()(self.x, self.x, input_pos=self.input_pos)
         tt_res = self.tt_mha(self.x, self.x, input_pos=self.input_pos)
-        self.assertTrue(torch.allclose(et_res, tt_res))
+        self.assertTrue(
+            torch.allclose(et_res, tt_res),
+            msg=f"TorchTune output is not close to ET output.\n\nTorchTune: {tt_res}\nET output: {et_res}",
+        )
 
         # TODO: KV cache.
 
@@ -162,6 +168,9 @@ def test_attention_executorch(self):
         et_res = method.execute((self.x, self.x, self.input_pos))
         tt_res = self.tt_mha(self.x, self.x, input_pos=self.input_pos)
 
-        self.assertTrue(torch.allclose(et_res[0], tt_res, atol=1e-06))
+        self.assertTrue(
+            torch.allclose(et_res[0], tt_res, atol=1e-05),
+            msg=f"TorchTune output is not close to ET output.\n\nTorchTune: {tt_res}\nET output: {et_res[0]}",
+        )
 
         # TODO: KV cache.
diff --git a/pytest.ini b/pytest.ini
@@ -39,7 +39,6 @@ addopts =
     backends/xnnpack/test
     # extension/
     extension/llm/modules/test
-    --ignore=extension/llm/modules/test/test_mha.py
     extension/pybindings/test
     # Runtime
     runtime