Skip to content

Intermediate output logging enablement - Inspector logging #4293

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions sdk/etdump/schema_flatcc.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,8 @@ class Value:
class DebugEvent:
chain_index: int
instruction_id: int
delegate_debug_id_int: Optional[int]
delegate_debug_id_str: Optional[str]
debug_entry: Value


Expand Down
2 changes: 2 additions & 0 deletions sdk/etdump/tests/serialize_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,8 @@ def get_sample_etdump_flatcc() -> flatcc.ETDumpFlatCC:
debug_event=flatcc.DebugEvent(
chain_index=1,
instruction_id=0,
delegate_debug_id_str="56",
delegate_debug_id_int=-1,
debug_entry=flatcc.Value(
val=flatcc.ValueType.TENSOR.value,
tensor=flatcc.Tensor(
Expand Down
18 changes: 15 additions & 3 deletions sdk/inspector/_inspector.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@
class InstructionEventSignature:
instruction_id: int
chain_index: int
delegate_id: Optional[int] = None
delegate_id_str: Optional[str] = None


# Aggregated Runtime Events for a single instruction
Expand Down Expand Up @@ -92,8 +94,12 @@ def gen_from_events(run_events: List[flatcc.Event]) -> List["InstructionEvent"]:

# Get existing InstructionEvent or insert a new one
signature = InstructionEventSignature(
populated_event.instruction_id, populated_event.chain_index
instruction_id=populated_event.instruction_id,
chain_index=populated_event.chain_index,
delegate_id=populated_event.delegate_debug_id_int,
delegate_id_str=populated_event.delegate_debug_id_str,
)

instruction_event = instruction_events.setdefault(
signature, InstructionEvent(signature=signature)
)
Expand All @@ -104,6 +110,7 @@ def gen_from_events(run_events: List[flatcc.Event]) -> List["InstructionEvent"]:
instruction_event.profile_events = []
instruction_event.profile_events.append(populated_event)
elif isinstance(populated_event, DebugEvent):
# Ignore run_output events
if not is_debug_output(populated_event.debug_entry):
if instruction_event.debug_events is None:
instruction_event.debug_events = []
Expand Down Expand Up @@ -139,7 +146,9 @@ def _gen_from_event(event: ProfileEvent) -> "ProfileEventSignature":
# Signature of a DebugEvent
@dataclass(frozen=True, order=True)
class DebugEventSignature:
instruction_id: Optional[int]
instruction_id: Optional[int] = -1
delegate_id: Optional[int] = None
delegate_id_str: Optional[str] = None

@staticmethod
def _gen_from_event(event: DebugEvent) -> "DebugEventSignature":
Expand All @@ -150,7 +159,9 @@ def _gen_from_event(event: DebugEvent) -> "DebugEventSignature":
The Signature will convert these back to the intended None value
"""
return DebugEventSignature(
event.instruction_id if event.instruction_id != -1 else None
event.instruction_id if event.instruction_id != -1 else None,
event.delegate_debug_id_int if event.delegate_debug_id_int != -1 else None,
event.delegate_debug_id_str if event.delegate_debug_id_str != "" else None,
)


Expand Down Expand Up @@ -562,6 +573,7 @@ def _populate_debugging_related_fields(
Fields Updated:
debug_data
"""

debug_data: List[flatcc.Value] = []
for event in events:
if (debug_events := event.debug_events) is None:
Expand Down
228 changes: 226 additions & 2 deletions sdk/inspector/tests/event_blocks_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,55 @@ def _gen_sample_profile_event(
end_time=time[1],
)

@staticmethod
def _gen_sample_debug_event(
instruction_id: int,
delegate_debug_id: Optional[Union[int, str]] = None,
) -> flatcc.DebugEvent:
"""
Helper for generating test DebugEvents

Notably:
- delegate_debug_id takes either the str or int representation
"""
delegate_debug_id_int = (
delegate_debug_id if isinstance(delegate_debug_id, int) else -1
)
delegate_debug_id_str = (
delegate_debug_id if isinstance(delegate_debug_id, str) else ""
)

return flatcc.DebugEvent(
chain_index=0,
instruction_id=instruction_id,
delegate_debug_id_int=delegate_debug_id_int,
delegate_debug_id_str=delegate_debug_id_str,
debug_entry=flatcc.Value(
val=flatcc.ValueType.TENSOR.value,
tensor=flatcc.Tensor(
scalar_type=flatcc.ScalarType.INT,
sizes=[1],
strides=[1],
offset=12345,
),
tensor_list=flatcc.TensorList(
[
flatcc.Tensor(
scalar_type=flatcc.ScalarType.INT,
sizes=[1],
strides=[1],
offset=12345,
)
]
),
int_value=flatcc.Int(1),
float_value=flatcc.Float(1.0),
double_value=flatcc.Double(1.0),
bool_value=flatcc.Bool(False),
output=None,
),
)

@staticmethod
def _get_sample_etdump_flatcc() -> flatcc.ETDumpFlatCC:
"""
Expand Down Expand Up @@ -123,17 +172,145 @@ def _get_sample_etdump_flatcc() -> flatcc.ETDumpFlatCC:

return ETDumpFlatCC(version=0, run_data=[run_data_1, run_data_2, run_data_3])

@staticmethod
def _get_sample_etdump_flatcc_inconsistent_debug_data() -> flatcc.ETDumpFlatCC:
debug_event_1 = TestEventBlock._gen_sample_debug_event(
instruction_id=1, delegate_debug_id=100
)
run_data_1 = flatcc.RunData(
name="signature_a",
bundled_input_index=-1,
allocators=[],
events=[
flatcc.Event(
allocation_event=None,
debug_event=debug_event_1,
profile_event=None,
),
],
)

debug_event_2 = TestEventBlock._gen_sample_debug_event(
instruction_id=1, delegate_debug_id=100
)
# Modify this debug event so it's different from debug_event_1
debug_event_2.debug_entry.tensor.sizes = [2] # pyre-ignore
run_data_2 = flatcc.RunData(
name="signature_a",
bundled_input_index=-1,
allocators=[],
events=[
flatcc.Event(
allocation_event=None,
debug_event=debug_event_2,
profile_event=None,
),
],
)
return ETDumpFlatCC(version=0, run_data=[run_data_1, run_data_2])

@staticmethod
def _get_sample_etdump_flatcc_profiling_and_debugging() -> flatcc.ETDumpFlatCC:
"""
Helper for getting a sample ETDumpFlatCC object with 3 RunData:
- run_data_1 has signature_a with (debug_event_1, profile_event_1)
- run_data_2 has the same signature with run_data_1 and same debug event, but different profiling times
- run_data_3 has signature_b with (debug_event_3, profile_event_3) and (not debug event, profile_event_4)
"""
profile_event_1 = TestEventBlock._gen_sample_profile_event(
name="profile_1", instruction_id=1, time=(0, 1), delegate_debug_id=100
)
debug_event_1 = TestEventBlock._gen_sample_debug_event(
instruction_id=1, delegate_debug_id=100
)
run_data_1 = flatcc.RunData(
name="signature_a",
bundled_input_index=-1,
allocators=[],
events=[
flatcc.Event(
allocation_event=None,
debug_event=None,
profile_event=profile_event_1,
),
flatcc.Event(
allocation_event=None,
debug_event=debug_event_1,
profile_event=None,
),
],
)

profile_event_2 = TestEventBlock._gen_sample_profile_event(
name="profile_1", instruction_id=1, time=(2, 4), delegate_debug_id=100
)
debug_event_2 = TestEventBlock._gen_sample_debug_event(
instruction_id=1, delegate_debug_id=100
)
run_data_2 = flatcc.RunData(
name="signature_a",
bundled_input_index=-1,
allocators=[],
events=[
flatcc.Event(
allocation_event=None,
debug_event=None,
profile_event=profile_event_2,
),
flatcc.Event(
allocation_event=None,
debug_event=debug_event_2,
profile_event=None,
),
],
)

profile_event_3 = TestEventBlock._gen_sample_profile_event(
name="profile_3", instruction_id=1, time=(5, 6), delegate_debug_id=100
)
debug_event_3 = TestEventBlock._gen_sample_debug_event(
instruction_id=1, delegate_debug_id=100
)
profile_event_4 = TestEventBlock._gen_sample_profile_event(
name="profile_4", instruction_id=2, time=(7, 8), delegate_debug_id=100
)
run_data_3 = flatcc.RunData(
name="signature_b",
bundled_input_index=-1,
allocators=[],
events=[
flatcc.Event(
allocation_event=None,
debug_event=debug_event_3,
profile_event=None,
),
flatcc.Event(
allocation_event=None,
debug_event=None,
profile_event=profile_event_3,
),
flatcc.Event(
allocation_event=None,
debug_event=None,
profile_event=profile_event_4,
),
],
)

return ETDumpFlatCC(version=0, run_data=[run_data_1, run_data_2, run_data_3])

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Tests ~~~~~~~~~~~~~~~~~~~~~~~~~~~~

def test_gen_from_etdump(self) -> None:
"""
Test "e2e" generation of EventBlocks given an ETDump
- Generated via EventBock.gen_from_etdump
- Generated via EventBlock.gen_from_etdump

Specifically it tests for external correctness:
- Correct number of EventBlocks
- Correct number of Events and Raw Data values (iterations)
"""

etdump: ETDumpFlatCC = TestEventBlock._get_sample_etdump_flatcc()
blocks: List[EventBlock] = EventBlock._gen_from_etdump(etdump)

Expand All @@ -147,6 +324,52 @@ def test_gen_from_etdump(self) -> None:
run_counts.add((len(block.events), len(perf_data.raw)))
self.assertSetEqual(run_counts, {(1, 2), (2, 1)})

def test_gen_from_etdump_profiling_and_debugging(self) -> None:
"""
Test "e2e" generation of EventBlocks given an ETDump with both profiling and debugging events
- Generated via EventBlock.gen_from_etdump

Specifically it tests for external correctness:
- Correct number of EventBlocks
- Correct number of raw perf_data and debug_data for each Event
"""
etdump: ETDumpFlatCC = (
TestEventBlock._get_sample_etdump_flatcc_profiling_and_debugging()
)
blocks: List[EventBlock] = EventBlock._gen_from_etdump(etdump)

self.assertEqual(len(blocks), 2, f"Expected 2 runs, got {len(blocks)}")

# One EventBlock should have 1 event with 2 iterations
# and 1 debug data (because we only populate debug data in the first iteration)
self.assertEqual(len(blocks[0].events), 1)
if (perf_data := blocks[0].events[0].perf_data) is not None:
self.assertEqual(len(perf_data.raw), 2)
self.assertEqual(len(blocks[0].events[0].debug_data), 1)

# The other EventBlock should have 2 events with 1 iterations, and only the fist event has debug data
self.assertEqual(len(blocks[1].events), 2)
perf_data = blocks[1].events[0].perf_data
self.assertIsNotNone(perf_data)
self.assertEqual(len(perf_data.raw), 1)

perf_data = blocks[1].events[1].perf_data
self.assertIsNotNone(perf_data)
self.assertEqual(len(perf_data.raw), 1)
self.assertEqual(len(blocks[1].events[0].debug_data), 1)
self.assertEqual(len(blocks[1].events[1].debug_data), 0)

def test_gen_from_etdump_inconsistent_debug_data(self) -> None:
"""
Make sure AssertionError is thrown when intermediate outputs are different across
different iterations of a model run
"""
etdump: ETDumpFlatCC = (
TestEventBlock._get_sample_etdump_flatcc_inconsistent_debug_data()
)
with self.assertRaises(AssertionError):
EventBlock._gen_from_etdump(etdump)

def test_inspector_event_generation(self) -> None:
"""
Test Inspector.Event derivation from various ProfileEvent cases
Expand Down Expand Up @@ -187,7 +410,8 @@ def _test_profile_event_generation(
self.assertEqual(profile_signature, expected_signature)

event_signature = EventSignature(
instruction_id=instruction_id, profile_event_signature=profile_signature
instruction_id=instruction_id,
profile_event_signature=profile_signature,
)

# Test Event Generation
Expand Down
8 changes: 8 additions & 0 deletions sdk/inspector/tests/inspector_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,8 @@ def test_populate_debugging_related_fields_raises_for_inconsistent_events(self):
debug_event_0 = flatcc.DebugEvent(
chain_index=1,
instruction_id=0,
delegate_debug_id_int=1,
delegate_debug_id_str=None,
debug_entry=flatcc.Value(
val=flatcc.ValueType.TENSOR.value,
tensor=flatcc.Tensor(
Expand All @@ -304,6 +306,8 @@ def test_populate_debugging_related_fields_raises_for_inconsistent_events(self):
debug_event_1 = flatcc.DebugEvent(
chain_index=1,
instruction_id=0,
delegate_debug_id_int=1,
delegate_debug_id_str=None,
debug_entry=flatcc.Value(
val=flatcc.ValueType.TENSOR.value,
tensor=flatcc.Tensor(
Expand Down Expand Up @@ -346,6 +350,8 @@ def test_populate_debugging_related_fields_passes_for_consistent_events(self):
debug_event_0 = flatcc.DebugEvent(
chain_index=1,
instruction_id=0,
delegate_debug_id_int=1,
delegate_debug_id_str=None,
debug_entry=flatcc.Value(
val=flatcc.ValueType.TENSOR.value,
tensor=flatcc.Tensor(
Expand All @@ -367,6 +373,8 @@ def test_populate_debugging_related_fields_passes_for_consistent_events(self):
debug_event_1 = flatcc.DebugEvent(
chain_index=1,
instruction_id=0,
delegate_debug_id_int=1,
delegate_debug_id_str=None,
debug_entry=flatcc.Value(
val=flatcc.ValueType.TENSOR.value,
tensor=flatcc.Tensor(
Expand Down
2 changes: 2 additions & 0 deletions sdk/inspector/tests/inspector_utils_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@ def test_find_populated_event(self):
debug_event = flatcc.DebugEvent(
chain_index=1,
instruction_id=0,
delegate_debug_id_str="56",
delegate_debug_id_int=-1,
debug_entry=flatcc.Value(
val=flatcc.ValueType.TENSOR.value,
tensor=flatcc.Tensor(
Expand Down
Loading