Skip to content

Commit c7c4007

Browse files
committed
Fix memory profiling for memory.view ops
ATT Differential Revision: [D68448333](https://our.internmc.facebook.com/intern/diff/D68448333/) ghstack-source-id: 262854266 Pull Request resolved: #7925
1 parent f73b8cf commit c7c4007

File tree

2 files changed

+16
-8
lines changed

2 files changed

+16
-8
lines changed

exir/memory_planning.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -747,6 +747,7 @@ def apply_algo(
747747
storage with tensors in the outer module.
748748
TODO: make these optimizations once we have some baseline working.
749749
"""
750+
750751
specs = update_all_tensors_lifetime(graph_module, graph_signature)
751752
bufsizes: List[int] = algo(
752753
graph_module, alignment, graph_signature, alloc_graph_input, alloc_graph_output

util/activation_memory_profiler.py

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -53,10 +53,11 @@ def create_tensor_allocation_info(graph: torch.fx.Graph) -> List[MemoryTimeline]
5353
"""
5454
nodes = graph.nodes
5555
memory_timeline: List[Optional[MemoryTimeline]] = [None for _ in range(len(nodes))]
56+
unique_specs = set()
5657
for _, node in enumerate(nodes):
5758
if node.op == "output":
5859
continue
59-
if node.target == memory.alloc:
60+
if node.target == memory.alloc or node.target == memory.view:
6061
continue
6162
tensor_specs = get_node_tensor_specs(node)
6263
if tensor_specs is None:
@@ -65,6 +66,9 @@ def create_tensor_allocation_info(graph: torch.fx.Graph) -> List[MemoryTimeline]
6566
# TODO: Make use of mem_id in the allocation info
6667
if tensor_spec is None or tensor_spec.mem_id is None or tensor_spec.const:
6768
continue
69+
if tensor_spec in unique_specs:
70+
continue
71+
unique_specs.add(tensor_spec)
6872
start, end = tensor_spec.lifetime
6973
size = num_bytes_from_shape_and_dtype(
7074
typing.cast(torch.Size, tensor_spec.shape), tensor_spec.dtype
@@ -75,6 +79,7 @@ def create_tensor_allocation_info(graph: torch.fx.Graph) -> List[MemoryTimeline]
7579
memory_timeline_j = memory_timeline[j]
7680
if memory_timeline_j is None:
7781
memory_timeline_j = MemoryTimeline()
82+
memory_timeline[j] = memory_timeline_j
7883
assert memory_timeline_j
7984
memory_timeline_j.allocations.append(
8085
Allocation(
@@ -106,6 +111,7 @@ def generate_memory_trace(
106111
chrome_trace_filename: str,
107112
enable_memory_offsets: bool = False,
108113
method_name: str = "forward",
114+
ommit_metadata: bool = False,
109115
):
110116
"""
111117
Generate the memory timeline from the given ExecuTorch program.
@@ -151,13 +157,14 @@ def generate_memory_trace(
151157
e["pid"] = int(allocation.memory_id)
152158
e["tid"] = tid
153159
e["args"] = {}
154-
e["args"]["op_name"] = f"{allocation.op_name}"
155-
# ID refers to memory space, typically from 1 to N.
156-
# For CPU, everything is allocated on one "space", other backends may have multiple.
157-
e["args"]["Memory ID"] = allocation.memory_id
158-
e["args"]["fqn"] = f"{allocation.fqn}"
159-
e["args"]["source"] = f"{allocation.file_and_line_num}"
160-
e["args"]["bytes"] = allocation.size_bytes
160+
if not ommit_metadata:
161+
e["args"]["op_name"] = f"{allocation.op_name}"
162+
# ID refers to memory space, typically from 1 to N.
163+
# For CPU, everything is allocated on one "space", other backends may have multiple.
164+
e["args"]["Memory ID"] = allocation.memory_id
165+
e["args"]["fqn"] = f"{allocation.fqn}"
166+
e["args"]["source"] = f"{allocation.file_and_line_num}"
167+
e["args"]["bytes"] = allocation.size_bytes
161168
start_time += allocation_size_kb
162169
trace_events.append(e)
163170
tid += 1

0 commit comments

Comments
 (0)