Rewrite etdump debug data value comparision (#4152)

Olivia-liu · facebook-github-bot · commit 7aa39b44afe5 · 2024-07-08T12:24:11.000-07:00
Summary: Pull Request resolved: #4152 The original comparison actually compares the metadata of 2 tensors, not the tensors themselves, and would fail when the 2 tensors are written at different locations in the buffer (because of different [offsets](https://www.internalfb.com/code/fbsource/[02da17b6e421d91ada2fd690e9f9ecfdb4bedfc1]/fbcode/executorch/sdk/etdump/schema_flatcc.py?lines=26)), even if their values are the same. Therefore, change to the new compaision logic which compares the actual values. Differential Revision: D59350018
diff --git a/sdk/inspector/_inspector.py b/sdk/inspector/_inspector.py
@@ -46,6 +46,7 @@
     gen_graphs_from_etrecord,
     inflate_runtime_output,
     is_debug_output,
+    is_inference_output_equal,
     ProgramOutput,
     RESERVED_FRAMEWORK_EVENT_NAMES,
     TIME_SCALE_DICT,
@@ -571,8 +572,10 @@ def _populate_debugging_related_fields(
                 debug_data = [debug_event.debug_entry for debug_event in debug_events]
             else:
                 for debug_event, value in zip(debug_events, debug_data):
-                    assert (
-                        debug_event.debug_entry == value
+                    v1 = inflate_runtime_output(debug_event.debug_entry, output_buffer)
+                    v2 = inflate_runtime_output(value, output_buffer)
+                    assert is_inference_output_equal(
+                        v1, v2
                     ), """Corresponding debug events in multiple iterations of the model
                     must have the same debug entry values. This is not the case for the
                     intermediate data present in this ETDump and indicates potential issues
diff --git a/sdk/inspector/_inspector_utils.py b/sdk/inspector/_inspector_utils.py
@@ -70,6 +70,20 @@ class TimeScale(Enum):
 ProgramOutput: TypeAlias = List[InferenceOutput]
 
 
+# Compare whether two InferenceOutputs are equal
+def is_inference_output_equal(
+    output1: InferenceOutput, output2: InferenceOutput
+) -> bool:
+    if isinstance(output1, torch.Tensor) and isinstance(output2, torch.Tensor):
+        return torch.equal(output1, output2)
+    elif isinstance(output1, List) and isinstance(output2, List):
+        return all(torch.equal(t1, t2) for t1, t2 in zip(output1, output2))
+    elif output1 == output2:
+        return True
+    else:
+        return False
+
+
 # Given a ETDump Tensor object and offset, extract into a torch.Tensor
 def _parse_tensor_value(
     tensor: Optional[Tensor], output_buffer: Optional[bytes]
diff --git a/sdk/inspector/tests/inspector_utils_test.py b/sdk/inspector/tests/inspector_utils_test.py
@@ -8,6 +8,8 @@
 import unittest
 from typing import Dict, Tuple
 
+import torch
+
 from executorch.sdk import generate_etrecord, parse_etrecord
 
 from executorch.sdk.debug_format.base_schema import (
@@ -25,6 +27,7 @@
     EDGE_DIALECT_GRAPH_KEY,
     find_populated_event,
     gen_graphs_from_etrecord,
+    is_inference_output_equal,
 )
 
 
@@ -126,6 +129,30 @@ def test_find_populated_event(self):
         )
         self.assertEqual(find_populated_event(event), profile_event)
 
+    def test_is_inference_output_equal(self):
+        # Compare tensors. Not equal because of different values
+        self.assertFalse(
+            is_inference_output_equal(
+                torch.tensor([[2, 1], [4, 3]]),
+                torch.tensor([[5, 6], [7, 8]]),
+            )
+        )
+
+        # Compare tensor lists
+        tensor_list_1 = (
+            [
+                torch.tensor([[1, 2], [3, 4]]),
+                torch.tensor([[1, 2], [3, 4]]),
+                torch.tensor([[1, 2], [3, 4]]),
+            ],
+        )
+        tensor_list_2 = [
+            torch.tensor([[1, 2], [3, 4]]),
+            torch.tensor([[1, 2], [3, 4]]),
+        ]
+        # Not equal because of different number of tensors
+        self.assertFalse(is_inference_output_equal(tensor_list_1, tensor_list_2))
+
 
 def gen_mock_operator_graph_with_expected_map() -> (
     Tuple[OperatorGraph, Dict[int, OperatorNode]]