Skip to content

Commit 4a88318

Browse files
Olivia-liufacebook-github-bot
authored andcommitted
Intermediate output logging enablement - Inspector logging (#4293)
Summary: Pull Request resolved: #4293 This diff parses the logged intermediate outputs in etdump into Inspector objects. It pretty much automatically works because the infra has already been built out for non-delegate intermediate output logging. The only change needed here is to add delegate debug id to `InstructionEventSignature` and `DebugEventSignature` so they group delegated ops correctly. Design doc: https://docs.google.com/document/d/1qGHsgd-roqtxPz4CrUlqGrKaAtbaf9bArziMuwHD0So/edit Reviewed By: Jack-Khuu Differential Revision: D59840296 fbshipit-source-id: 04f22d4520584090f3b37b83386f704cc7a2c271
1 parent 1b0bf1c commit 4a88318

File tree

6 files changed

+255
-5
lines changed

6 files changed

+255
-5
lines changed

sdk/etdump/schema_flatcc.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,8 @@ class Value:
9595
class DebugEvent:
9696
chain_index: int
9797
instruction_id: int
98+
delegate_debug_id_int: Optional[int]
99+
delegate_debug_id_str: Optional[str]
98100
debug_entry: Value
99101

100102

sdk/etdump/tests/serialize_test.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,8 @@ def get_sample_etdump_flatcc() -> flatcc.ETDumpFlatCC:
8585
debug_event=flatcc.DebugEvent(
8686
chain_index=1,
8787
instruction_id=0,
88+
delegate_debug_id_str="56",
89+
delegate_debug_id_int=-1,
8890
debug_entry=flatcc.Value(
8991
val=flatcc.ValueType.TENSOR.value,
9092
tensor=flatcc.Tensor(

sdk/inspector/_inspector.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,8 @@
6565
class InstructionEventSignature:
6666
instruction_id: int
6767
chain_index: int
68+
delegate_id: Optional[int] = None
69+
delegate_id_str: Optional[str] = None
6870

6971

7072
# Aggregated Runtime Events for a single instruction
@@ -92,8 +94,12 @@ def gen_from_events(run_events: List[flatcc.Event]) -> List["InstructionEvent"]:
9294

9395
# Get existing InstructionEvent or insert a new one
9496
signature = InstructionEventSignature(
95-
populated_event.instruction_id, populated_event.chain_index
97+
instruction_id=populated_event.instruction_id,
98+
chain_index=populated_event.chain_index,
99+
delegate_id=populated_event.delegate_debug_id_int,
100+
delegate_id_str=populated_event.delegate_debug_id_str,
96101
)
102+
97103
instruction_event = instruction_events.setdefault(
98104
signature, InstructionEvent(signature=signature)
99105
)
@@ -104,6 +110,7 @@ def gen_from_events(run_events: List[flatcc.Event]) -> List["InstructionEvent"]:
104110
instruction_event.profile_events = []
105111
instruction_event.profile_events.append(populated_event)
106112
elif isinstance(populated_event, DebugEvent):
113+
# Ignore run_output events
107114
if not is_debug_output(populated_event.debug_entry):
108115
if instruction_event.debug_events is None:
109116
instruction_event.debug_events = []
@@ -139,7 +146,9 @@ def _gen_from_event(event: ProfileEvent) -> "ProfileEventSignature":
139146
# Signature of a DebugEvent
140147
@dataclass(frozen=True, order=True)
141148
class DebugEventSignature:
142-
instruction_id: Optional[int]
149+
instruction_id: Optional[int] = -1
150+
delegate_id: Optional[int] = None
151+
delegate_id_str: Optional[str] = None
143152

144153
@staticmethod
145154
def _gen_from_event(event: DebugEvent) -> "DebugEventSignature":
@@ -150,7 +159,9 @@ def _gen_from_event(event: DebugEvent) -> "DebugEventSignature":
150159
The Signature will convert these back to the intended None value
151160
"""
152161
return DebugEventSignature(
153-
event.instruction_id if event.instruction_id != -1 else None
162+
event.instruction_id if event.instruction_id != -1 else None,
163+
event.delegate_debug_id_int if event.delegate_debug_id_int != -1 else None,
164+
event.delegate_debug_id_str if event.delegate_debug_id_str != "" else None,
154165
)
155166

156167

@@ -562,6 +573,7 @@ def _populate_debugging_related_fields(
562573
Fields Updated:
563574
debug_data
564575
"""
576+
565577
debug_data: List[flatcc.Value] = []
566578
for event in events:
567579
if (debug_events := event.debug_events) is None:

sdk/inspector/tests/event_blocks_test.py

Lines changed: 226 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,55 @@ def _gen_sample_profile_event(
5858
end_time=time[1],
5959
)
6060

61+
@staticmethod
62+
def _gen_sample_debug_event(
63+
instruction_id: int,
64+
delegate_debug_id: Optional[Union[int, str]] = None,
65+
) -> flatcc.DebugEvent:
66+
"""
67+
Helper for generating test DebugEvents
68+
69+
Notably:
70+
- delegate_debug_id takes either the str or int representation
71+
"""
72+
delegate_debug_id_int = (
73+
delegate_debug_id if isinstance(delegate_debug_id, int) else -1
74+
)
75+
delegate_debug_id_str = (
76+
delegate_debug_id if isinstance(delegate_debug_id, str) else ""
77+
)
78+
79+
return flatcc.DebugEvent(
80+
chain_index=0,
81+
instruction_id=instruction_id,
82+
delegate_debug_id_int=delegate_debug_id_int,
83+
delegate_debug_id_str=delegate_debug_id_str,
84+
debug_entry=flatcc.Value(
85+
val=flatcc.ValueType.TENSOR.value,
86+
tensor=flatcc.Tensor(
87+
scalar_type=flatcc.ScalarType.INT,
88+
sizes=[1],
89+
strides=[1],
90+
offset=12345,
91+
),
92+
tensor_list=flatcc.TensorList(
93+
[
94+
flatcc.Tensor(
95+
scalar_type=flatcc.ScalarType.INT,
96+
sizes=[1],
97+
strides=[1],
98+
offset=12345,
99+
)
100+
]
101+
),
102+
int_value=flatcc.Int(1),
103+
float_value=flatcc.Float(1.0),
104+
double_value=flatcc.Double(1.0),
105+
bool_value=flatcc.Bool(False),
106+
output=None,
107+
),
108+
)
109+
61110
@staticmethod
62111
def _get_sample_etdump_flatcc() -> flatcc.ETDumpFlatCC:
63112
"""
@@ -123,17 +172,145 @@ def _get_sample_etdump_flatcc() -> flatcc.ETDumpFlatCC:
123172

124173
return ETDumpFlatCC(version=0, run_data=[run_data_1, run_data_2, run_data_3])
125174

175+
@staticmethod
176+
def _get_sample_etdump_flatcc_inconsistent_debug_data() -> flatcc.ETDumpFlatCC:
177+
debug_event_1 = TestEventBlock._gen_sample_debug_event(
178+
instruction_id=1, delegate_debug_id=100
179+
)
180+
run_data_1 = flatcc.RunData(
181+
name="signature_a",
182+
bundled_input_index=-1,
183+
allocators=[],
184+
events=[
185+
flatcc.Event(
186+
allocation_event=None,
187+
debug_event=debug_event_1,
188+
profile_event=None,
189+
),
190+
],
191+
)
192+
193+
debug_event_2 = TestEventBlock._gen_sample_debug_event(
194+
instruction_id=1, delegate_debug_id=100
195+
)
196+
# Modify this debug event so it's different from debug_event_1
197+
debug_event_2.debug_entry.tensor.sizes = [2] # pyre-ignore
198+
run_data_2 = flatcc.RunData(
199+
name="signature_a",
200+
bundled_input_index=-1,
201+
allocators=[],
202+
events=[
203+
flatcc.Event(
204+
allocation_event=None,
205+
debug_event=debug_event_2,
206+
profile_event=None,
207+
),
208+
],
209+
)
210+
return ETDumpFlatCC(version=0, run_data=[run_data_1, run_data_2])
211+
212+
@staticmethod
213+
def _get_sample_etdump_flatcc_profiling_and_debugging() -> flatcc.ETDumpFlatCC:
214+
"""
215+
Helper for getting a sample ETDumpFlatCC object with 3 RunData:
216+
- run_data_1 has signature_a with (debug_event_1, profile_event_1)
217+
- run_data_2 has the same signature with run_data_1 and same debug event, but different profiling times
218+
- run_data_3 has signature_b with (debug_event_3, profile_event_3) and (not debug event, profile_event_4)
219+
"""
220+
profile_event_1 = TestEventBlock._gen_sample_profile_event(
221+
name="profile_1", instruction_id=1, time=(0, 1), delegate_debug_id=100
222+
)
223+
debug_event_1 = TestEventBlock._gen_sample_debug_event(
224+
instruction_id=1, delegate_debug_id=100
225+
)
226+
run_data_1 = flatcc.RunData(
227+
name="signature_a",
228+
bundled_input_index=-1,
229+
allocators=[],
230+
events=[
231+
flatcc.Event(
232+
allocation_event=None,
233+
debug_event=None,
234+
profile_event=profile_event_1,
235+
),
236+
flatcc.Event(
237+
allocation_event=None,
238+
debug_event=debug_event_1,
239+
profile_event=None,
240+
),
241+
],
242+
)
243+
244+
profile_event_2 = TestEventBlock._gen_sample_profile_event(
245+
name="profile_1", instruction_id=1, time=(2, 4), delegate_debug_id=100
246+
)
247+
debug_event_2 = TestEventBlock._gen_sample_debug_event(
248+
instruction_id=1, delegate_debug_id=100
249+
)
250+
run_data_2 = flatcc.RunData(
251+
name="signature_a",
252+
bundled_input_index=-1,
253+
allocators=[],
254+
events=[
255+
flatcc.Event(
256+
allocation_event=None,
257+
debug_event=None,
258+
profile_event=profile_event_2,
259+
),
260+
flatcc.Event(
261+
allocation_event=None,
262+
debug_event=debug_event_2,
263+
profile_event=None,
264+
),
265+
],
266+
)
267+
268+
profile_event_3 = TestEventBlock._gen_sample_profile_event(
269+
name="profile_3", instruction_id=1, time=(5, 6), delegate_debug_id=100
270+
)
271+
debug_event_3 = TestEventBlock._gen_sample_debug_event(
272+
instruction_id=1, delegate_debug_id=100
273+
)
274+
profile_event_4 = TestEventBlock._gen_sample_profile_event(
275+
name="profile_4", instruction_id=2, time=(7, 8), delegate_debug_id=100
276+
)
277+
run_data_3 = flatcc.RunData(
278+
name="signature_b",
279+
bundled_input_index=-1,
280+
allocators=[],
281+
events=[
282+
flatcc.Event(
283+
allocation_event=None,
284+
debug_event=debug_event_3,
285+
profile_event=None,
286+
),
287+
flatcc.Event(
288+
allocation_event=None,
289+
debug_event=None,
290+
profile_event=profile_event_3,
291+
),
292+
flatcc.Event(
293+
allocation_event=None,
294+
debug_event=None,
295+
profile_event=profile_event_4,
296+
),
297+
],
298+
)
299+
300+
return ETDumpFlatCC(version=0, run_data=[run_data_1, run_data_2, run_data_3])
301+
126302
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Tests ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
127303

128304
def test_gen_from_etdump(self) -> None:
129305
"""
130306
Test "e2e" generation of EventBlocks given an ETDump
131-
- Generated via EventBock.gen_from_etdump
307+
- Generated via EventBlock.gen_from_etdump
132308
133309
Specifically it tests for external correctness:
134310
- Correct number of EventBlocks
135311
- Correct number of Events and Raw Data values (iterations)
136312
"""
313+
137314
etdump: ETDumpFlatCC = TestEventBlock._get_sample_etdump_flatcc()
138315
blocks: List[EventBlock] = EventBlock._gen_from_etdump(etdump)
139316

@@ -147,6 +324,52 @@ def test_gen_from_etdump(self) -> None:
147324
run_counts.add((len(block.events), len(perf_data.raw)))
148325
self.assertSetEqual(run_counts, {(1, 2), (2, 1)})
149326

327+
def test_gen_from_etdump_profiling_and_debugging(self) -> None:
328+
"""
329+
Test "e2e" generation of EventBlocks given an ETDump with both profiling and debugging events
330+
- Generated via EventBlock.gen_from_etdump
331+
332+
Specifically it tests for external correctness:
333+
- Correct number of EventBlocks
334+
- Correct number of raw perf_data and debug_data for each Event
335+
"""
336+
etdump: ETDumpFlatCC = (
337+
TestEventBlock._get_sample_etdump_flatcc_profiling_and_debugging()
338+
)
339+
blocks: List[EventBlock] = EventBlock._gen_from_etdump(etdump)
340+
341+
self.assertEqual(len(blocks), 2, f"Expected 2 runs, got {len(blocks)}")
342+
343+
# One EventBlock should have 1 event with 2 iterations
344+
# and 1 debug data (because we only populate debug data in the first iteration)
345+
self.assertEqual(len(blocks[0].events), 1)
346+
if (perf_data := blocks[0].events[0].perf_data) is not None:
347+
self.assertEqual(len(perf_data.raw), 2)
348+
self.assertEqual(len(blocks[0].events[0].debug_data), 1)
349+
350+
# The other EventBlock should have 2 events with 1 iterations, and only the fist event has debug data
351+
self.assertEqual(len(blocks[1].events), 2)
352+
perf_data = blocks[1].events[0].perf_data
353+
self.assertIsNotNone(perf_data)
354+
self.assertEqual(len(perf_data.raw), 1)
355+
356+
perf_data = blocks[1].events[1].perf_data
357+
self.assertIsNotNone(perf_data)
358+
self.assertEqual(len(perf_data.raw), 1)
359+
self.assertEqual(len(blocks[1].events[0].debug_data), 1)
360+
self.assertEqual(len(blocks[1].events[1].debug_data), 0)
361+
362+
def test_gen_from_etdump_inconsistent_debug_data(self) -> None:
363+
"""
364+
Make sure AssertionError is thrown when intermediate outputs are different across
365+
different iterations of a model run
366+
"""
367+
etdump: ETDumpFlatCC = (
368+
TestEventBlock._get_sample_etdump_flatcc_inconsistent_debug_data()
369+
)
370+
with self.assertRaises(AssertionError):
371+
EventBlock._gen_from_etdump(etdump)
372+
150373
def test_inspector_event_generation(self) -> None:
151374
"""
152375
Test Inspector.Event derivation from various ProfileEvent cases
@@ -187,7 +410,8 @@ def _test_profile_event_generation(
187410
self.assertEqual(profile_signature, expected_signature)
188411

189412
event_signature = EventSignature(
190-
instruction_id=instruction_id, profile_event_signature=profile_signature
413+
instruction_id=instruction_id,
414+
profile_event_signature=profile_signature,
191415
)
192416

193417
# Test Event Generation

sdk/inspector/tests/inspector_test.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -283,6 +283,8 @@ def test_populate_debugging_related_fields_raises_for_inconsistent_events(self):
283283
debug_event_0 = flatcc.DebugEvent(
284284
chain_index=1,
285285
instruction_id=0,
286+
delegate_debug_id_int=1,
287+
delegate_debug_id_str=None,
286288
debug_entry=flatcc.Value(
287289
val=flatcc.ValueType.TENSOR.value,
288290
tensor=flatcc.Tensor(
@@ -304,6 +306,8 @@ def test_populate_debugging_related_fields_raises_for_inconsistent_events(self):
304306
debug_event_1 = flatcc.DebugEvent(
305307
chain_index=1,
306308
instruction_id=0,
309+
delegate_debug_id_int=1,
310+
delegate_debug_id_str=None,
307311
debug_entry=flatcc.Value(
308312
val=flatcc.ValueType.TENSOR.value,
309313
tensor=flatcc.Tensor(
@@ -346,6 +350,8 @@ def test_populate_debugging_related_fields_passes_for_consistent_events(self):
346350
debug_event_0 = flatcc.DebugEvent(
347351
chain_index=1,
348352
instruction_id=0,
353+
delegate_debug_id_int=1,
354+
delegate_debug_id_str=None,
349355
debug_entry=flatcc.Value(
350356
val=flatcc.ValueType.TENSOR.value,
351357
tensor=flatcc.Tensor(
@@ -367,6 +373,8 @@ def test_populate_debugging_related_fields_passes_for_consistent_events(self):
367373
debug_event_1 = flatcc.DebugEvent(
368374
chain_index=1,
369375
instruction_id=0,
376+
delegate_debug_id_int=1,
377+
delegate_debug_id_str=None,
370378
debug_entry=flatcc.Value(
371379
val=flatcc.ValueType.TENSOR.value,
372380
tensor=flatcc.Tensor(

sdk/inspector/tests/inspector_utils_test.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,8 @@ def test_find_populated_event(self):
7676
debug_event = flatcc.DebugEvent(
7777
chain_index=1,
7878
instruction_id=0,
79+
delegate_debug_id_str="56",
80+
delegate_debug_id_int=-1,
7981
debug_entry=flatcc.Value(
8082
val=flatcc.ValueType.TENSOR.value,
8183
tensor=flatcc.Tensor(

0 commit comments

Comments
 (0)