Skip to content

Commit 561c035

Browse files
Olivia-liufacebook-github-bot
authored andcommitted
Rewrite etdump debug data value comparison (#4152)
Summary: Pull Request resolved: #4152 The original comparison actually compares the metadata of 2 tensors, not the tensors themselves, and would fail when the 2 tensors are written at different locations in the buffer (because of different [offsets](https://www.internalfb.com/code/fbsource/[02da17b6e421d91ada2fd690e9f9ecfdb4bedfc1]/fbcode/executorch/sdk/etdump/schema_flatcc.py?lines=26)), even if their values are the same. Therefore, change to the new comparison logic which compares the actual values. bypass-github-export-checks bypass-github-pytorch-ci-checks bypass-github-executorch-ci-checks Reviewed By: dbort Differential Revision: D59350018 fbshipit-source-id: 203a202574b4f3f0ff8f2ebb606280acdd57b310
1 parent b10b763 commit 561c035

File tree

4 files changed

+189
-2
lines changed

4 files changed

+189
-2
lines changed

sdk/inspector/_inspector.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
gen_graphs_from_etrecord,
4747
inflate_runtime_output,
4848
is_debug_output,
49+
is_inference_output_equal,
4950
ProgramOutput,
5051
RESERVED_FRAMEWORK_EVENT_NAMES,
5152
TIME_SCALE_DICT,
@@ -571,8 +572,10 @@ def _populate_debugging_related_fields(
571572
debug_data = [debug_event.debug_entry for debug_event in debug_events]
572573
else:
573574
for debug_event, value in zip(debug_events, debug_data):
574-
assert (
575-
debug_event.debug_entry == value
575+
v1 = inflate_runtime_output(debug_event.debug_entry, output_buffer)
576+
v2 = inflate_runtime_output(value, output_buffer)
577+
assert is_inference_output_equal(
578+
v1, v2
576579
), """Corresponding debug events in multiple iterations of the model
577580
must have the same debug entry values. This is not the case for the
578581
intermediate data present in this ETDump and indicates potential issues

sdk/inspector/_inspector_utils.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,20 @@ class TimeScale(Enum):
7070
ProgramOutput: TypeAlias = List[InferenceOutput]
7171

7272

73+
# Compare whether two InferenceOutputs are equal
74+
def is_inference_output_equal(
75+
output1: InferenceOutput, output2: InferenceOutput
76+
) -> bool:
77+
if isinstance(output1, torch.Tensor) and isinstance(output2, torch.Tensor):
78+
return torch.equal(output1, output2)
79+
elif isinstance(output1, List) and isinstance(output2, List):
80+
return all(torch.equal(t1, t2) for t1, t2 in zip(output1, output2))
81+
elif output1 == output2:
82+
return True
83+
else:
84+
return False
85+
86+
7387
# Given a ETDump Tensor object and offset, extract into a torch.Tensor
7488
def _parse_tensor_value(
7589
tensor: Optional[Tensor], output_buffer: Optional[bytes]

sdk/inspector/tests/inspector_test.py

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222

2323
from executorch.sdk.inspector import _inspector, Event, EventBlock, Inspector, PerfData
2424
from executorch.sdk.inspector._inspector import (
25+
DebugEventSignature,
26+
flatcc,
2527
InstructionEvent,
2628
InstructionEventSignature,
2729
ProfileEventSignature,
@@ -273,6 +275,132 @@ def test_inspector_get_exported_program(self):
273275
)
274276
)
275277

278+
def test_populate_debugging_related_fields_raises_for_inconsistent_events(self):
279+
ret_event: Event = Event(
280+
name="event",
281+
)
282+
283+
debug_event_0 = flatcc.DebugEvent(
284+
chain_index=1,
285+
instruction_id=0,
286+
debug_entry=flatcc.Value(
287+
val=flatcc.ValueType.TENSOR.value,
288+
tensor=flatcc.Tensor(
289+
scalar_type=flatcc.ScalarType.INT,
290+
sizes=[2],
291+
strides=[1],
292+
offset=12345,
293+
),
294+
tensor_list=None,
295+
int_value=None,
296+
float_value=None,
297+
double_value=None,
298+
bool_value=None,
299+
output=None,
300+
),
301+
)
302+
303+
# Note the sizes of this tensor are different from the previous one
304+
debug_event_1 = flatcc.DebugEvent(
305+
chain_index=1,
306+
instruction_id=0,
307+
debug_entry=flatcc.Value(
308+
val=flatcc.ValueType.TENSOR.value,
309+
tensor=flatcc.Tensor(
310+
scalar_type=flatcc.ScalarType.INT,
311+
sizes=[1],
312+
strides=[1],
313+
offset=23456,
314+
),
315+
tensor_list=None,
316+
int_value=None,
317+
float_value=None,
318+
double_value=None,
319+
bool_value=None,
320+
output=None,
321+
),
322+
)
323+
324+
instruction_event_0 = InstructionEvent(
325+
signature=InstructionEventSignature(1, 1), debug_events=[debug_event_0]
326+
)
327+
instruction_event_1 = InstructionEvent(
328+
signature=InstructionEventSignature(1, 1), debug_events=[debug_event_1]
329+
)
330+
331+
events = [instruction_event_0, instruction_event_1]
332+
333+
# Expect AssertionError because 2 tensors have different sizes
334+
with self.assertRaises(AssertionError):
335+
Event._populate_debugging_related_fields(
336+
ret_event=ret_event,
337+
debug_event_signature=DebugEventSignature(instruction_id=1),
338+
events=events,
339+
)
340+
341+
def test_populate_debugging_related_fields_passes_for_consistent_events(self):
342+
ret_event: Event = Event(
343+
name="event",
344+
)
345+
346+
debug_event_0 = flatcc.DebugEvent(
347+
chain_index=1,
348+
instruction_id=0,
349+
debug_entry=flatcc.Value(
350+
val=flatcc.ValueType.TENSOR.value,
351+
tensor=flatcc.Tensor(
352+
scalar_type=flatcc.ScalarType.INT,
353+
sizes=[1],
354+
strides=[1],
355+
offset=12345,
356+
),
357+
tensor_list=None,
358+
int_value=None,
359+
float_value=None,
360+
double_value=None,
361+
bool_value=None,
362+
output=None,
363+
),
364+
)
365+
366+
# Same as the event above except for offset
367+
debug_event_1 = flatcc.DebugEvent(
368+
chain_index=1,
369+
instruction_id=0,
370+
debug_entry=flatcc.Value(
371+
val=flatcc.ValueType.TENSOR.value,
372+
tensor=flatcc.Tensor(
373+
scalar_type=flatcc.ScalarType.INT,
374+
sizes=[1],
375+
strides=[1],
376+
offset=23456,
377+
),
378+
tensor_list=None,
379+
int_value=None,
380+
float_value=None,
381+
double_value=None,
382+
bool_value=None,
383+
output=None,
384+
),
385+
)
386+
387+
instruction_event_0 = InstructionEvent(
388+
signature=InstructionEventSignature(1, 1), debug_events=[debug_event_0]
389+
)
390+
instruction_event_1 = InstructionEvent(
391+
signature=InstructionEventSignature(1, 1), debug_events=[debug_event_1]
392+
)
393+
394+
events = [instruction_event_0, instruction_event_1]
395+
396+
with patch.object(_inspector, "is_inference_output_equal", return_value=True):
397+
# Expect it runs with no error because is_inference_output_equal() is mocked to return True
398+
Event._populate_debugging_related_fields(
399+
ret_event=ret_event,
400+
debug_event_signature=DebugEventSignature(instruction_id=1),
401+
events=events,
402+
)
403+
276404
def _gen_random_float_list(self) -> List[float]:
277405
return [random.uniform(0, 10) for _ in range(RAW_DATA_SIZE)]
278406

sdk/inspector/tests/inspector_utils_test.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
import unittest
99
from typing import Dict, Tuple
1010

11+
import torch
12+
1113
from executorch.sdk import generate_etrecord, parse_etrecord
1214

1315
from executorch.sdk.debug_format.base_schema import (
@@ -25,6 +27,7 @@
2527
EDGE_DIALECT_GRAPH_KEY,
2628
find_populated_event,
2729
gen_graphs_from_etrecord,
30+
is_inference_output_equal,
2831
)
2932

3033

@@ -126,6 +129,45 @@ def test_find_populated_event(self):
126129
)
127130
self.assertEqual(find_populated_event(event), profile_event)
128131

132+
def test_is_inference_output_equal_returns_false_for_different_tensor_values(self):
133+
self.assertFalse(
134+
is_inference_output_equal(
135+
torch.tensor([[2, 1], [4, 3]]),
136+
torch.tensor([[5, 6], [7, 8]]),
137+
)
138+
)
139+
140+
def test_is_inference_output_equal_returns_false_for_different_tensor_lists(self):
141+
tensor_list_1 = (
142+
[
143+
torch.tensor([[1, 2], [3, 4]]),
144+
torch.tensor([[1, 2], [3, 4]]),
145+
torch.tensor([[1, 2], [3, 4]]),
146+
],
147+
)
148+
tensor_list_2 = [
149+
torch.tensor([[1, 2], [3, 4]]),
150+
torch.tensor([[1, 2], [3, 4]]),
151+
]
152+
# Not equal because of different number of tensors
153+
self.assertFalse(is_inference_output_equal(tensor_list_1, tensor_list_2))
154+
155+
def test_is_inference_output_equal_returns_true_for_same_tensor_values(self):
156+
self.assertTrue(
157+
is_inference_output_equal(
158+
torch.tensor([[2, 1], [4, 3]]),
159+
torch.tensor([[2, 1], [4, 3]]),
160+
)
161+
)
162+
163+
def test_is_inference_output_equal_returns_true_for_same_strs(self):
164+
self.assertTrue(
165+
is_inference_output_equal(
166+
"value_string",
167+
"value_string",
168+
)
169+
)
170+
129171

130172
def gen_mock_operator_graph_with_expected_map() -> (
131173
Tuple[OperatorGraph, Dict[int, OperatorNode]]

0 commit comments

Comments
 (0)