|
57 | 57 | # edge dialect model (``EdgeProgramManager``), the ExecuTorch dialect model
|
58 | 58 | # (``ExecutorchProgramManager``), and an optional dictionary of additional models
|
59 | 59 | #
|
60 |
| -# In this tutorial, the mobilenet v2 example model is used to demonstrate:: |
61 |
| -# |
62 |
| -# # Imports |
63 |
| -# import copy |
64 |
| -# import torch |
65 |
| -# |
66 |
| -# from executorch.examples.models.mobilenet_v2 import MV2Model |
67 |
| -# from executorch.exir import ( |
68 |
| -# EdgeCompileConfig, |
69 |
| -# EdgeProgramManager, |
70 |
| -# ExecutorchProgramManager, |
71 |
| -# to_edge, |
72 |
| -# ) |
73 |
| -# from executorch.sdk import generate_etrecord |
74 |
| -# from torch.export import export, ExportedProgram |
75 |
| -# |
76 |
| -# # Generate MV2 Model |
77 |
| -# model: torch.nn.Module = MV2Model() |
78 |
| -# |
79 |
| -# aten_model: ExportedProgram = export( |
80 |
| -# model.get_eager_model().eval(), |
81 |
| -# model.get_example_inputs(), |
82 |
| -# ) |
83 |
| -# |
84 |
| -# edge_program_manager: EdgeProgramManager = to_edge(aten_model, compile_config=EdgeCompileConfig(_check_ir_validity=True)) |
85 |
| -# edge_program_manager_copy = copy.deepcopy(edge_program_manager) |
86 |
| -# et_program_manager: ExecutorchProgramManager = edge_program_manager_copy.to_executorch() |
87 |
| -# |
88 |
| -# |
89 |
| -# # Generate ETRecord |
90 |
| -# etrecord_path = "etrecord.bin" |
91 |
| -# generate_etrecord(etrecord_path, edge_program_manager, et_program_manager) |
92 |
| -# |
| 60 | +# In this tutorial, the mobilenet v2 example model is used to demonstrate. |
| 61 | + |
| 62 | +# Imports |
| 63 | +import copy |
| 64 | + |
| 65 | +import torch |
| 66 | + |
| 67 | +from executorch.examples.models.mobilenet_v2 import MV2Model |
| 68 | +from executorch.exir import ( |
| 69 | + EdgeCompileConfig, |
| 70 | + EdgeProgramManager, |
| 71 | + ExecutorchProgramManager, |
| 72 | + to_edge, |
| 73 | +) |
| 74 | +from executorch.sdk import generate_etrecord |
| 75 | +from torch.export import export, ExportedProgram |
| 76 | + |
| 77 | +# Generate MV2 Model |
| 78 | +model: torch.nn.Module = MV2Model() |
| 79 | + |
| 80 | +aten_model: ExportedProgram = export( |
| 81 | + model.get_eager_model().eval(), |
| 82 | + model.get_example_inputs(), |
| 83 | +) |
| 84 | + |
| 85 | +edge_program_manager: EdgeProgramManager = to_edge( |
| 86 | + aten_model, compile_config=EdgeCompileConfig(_check_ir_validity=True) |
| 87 | +) |
| 88 | +edge_program_manager_copy = copy.deepcopy(edge_program_manager) |
| 89 | +et_program_manager: ExecutorchProgramManager = edge_program_manager_copy.to_executorch() |
| 90 | + |
| 91 | + |
| 92 | +# Generate ETRecord |
| 93 | +etrecord_path = "etrecord.bin" |
| 94 | +generate_etrecord(etrecord_path, edge_program_manager, et_program_manager) |
| 95 | + |
| 96 | +###################################################################### |
93 | 97 | # .. warning::
|
94 | 98 | # Users should do a deepcopy of the output of to_edge() and pass in the
|
95 | 99 | # deepcopy to the generate_etrecord API. This is needed because the
|
|
121 | 125 | # Note: An ``ETRecord`` is not required. If an ``ETRecord`` is not provided,
|
122 | 126 | # the Inspector will show runtime results without operator correlation.
|
123 | 127 | #
|
124 |
| -# To visualize all runtime events, call ``print_data_tabular``:: |
125 |
| -# |
126 |
| -# from executorch.sdk import Inspector |
127 |
| -# |
128 |
| -# etdump_path = "etdump.etdp" |
129 |
| -# inspector = Inspector(etdump_path=etdump_path, etrecord_path=etrecord_path) |
130 |
| -# inspector.print_data_tabular() |
131 |
| -# |
| 128 | +# To visualize all runtime events, call Inspector's ``print_data_tabular``. |
132 | 129 |
|
| 130 | +from executorch.sdk import Inspector |
| 131 | + |
| 132 | +etrecord_path = "etdump.etdp" |
| 133 | +etdump_path = "etdump.etdp" |
| 134 | +inspector = Inspector(etdump_path=etdump_path, etrecord_path=etrecord_path) |
| 135 | +inspector.print_data_tabular() |
133 | 136 |
|
134 | 137 | ######################################################################
|
135 | 138 | # Analyzing with an Inspector
|
|
139 | 142 | # and ``DataFrames``. These mediums give users the ability to perform custom
|
140 | 143 | # analysis about their model performance.
|
141 | 144 | #
|
142 |
| -# Below are examples usages, with both ``EventBlock`` and ``DataFrame`` approaches:: |
143 |
| -# |
144 |
| -# # Set Up |
145 |
| -# |
146 |
| -# import pprint as pp |
147 |
| -# import pandas as pd |
148 |
| -# |
149 |
| -# pd.set_option('display.max_colwidth', None) |
150 |
| -# pd.set_option('display.max_columns', None) |
151 |
| -# |
| 145 | +# Below are examples usages, with both ``EventBlock`` and ``DataFrame`` approaches. |
| 146 | + |
| 147 | +# Set Up |
| 148 | +import pprint as pp |
| 149 | +import pandas as pd |
| 150 | + |
| 151 | +pd.set_option("display.max_colwidth", None) |
| 152 | +pd.set_option("display.max_columns", None) |
| 153 | + |
| 154 | +###################################################################### |
152 | 155 | # If a user wants the raw profiling results, they would do something similar to
|
153 |
| -# finding the raw runtime data of an ``addmm.out`` event:: |
154 |
| -# |
155 |
| -# for event_block in inspector.event_blocks: |
156 |
| -# # Via EventBlocks |
157 |
| -# for event in event_block.events: |
158 |
| -# if event.name == 'native_call_addmm.out': |
159 |
| -# print(event.name, event.perf_data.raw) |
160 |
| -# |
161 |
| -# # Via Dataframe |
162 |
| -# df = event_block.to_dataframe() |
163 |
| -# df = df[df.event_name == 'native_call_addmm.out'] |
164 |
| -# print(df[['event_name', 'raw']]) |
165 |
| -# print() |
166 |
| -# |
| 156 | +# finding the raw runtime data of an ``addmm.out`` event. |
| 157 | + |
| 158 | +for event_block in inspector.event_blocks: |
| 159 | + # Via EventBlocks |
| 160 | + for event in event_block.events: |
| 161 | + if event.name == "native_call_addmm.out": |
| 162 | + print(event.name, event.perf_data.raw) |
| 163 | + |
| 164 | + # Via Dataframe |
| 165 | + df = event_block.to_dataframe() |
| 166 | + df = df[df.event_name == "native_call_addmm.out"] |
| 167 | + print(df[["event_name', 'raw"]]) |
| 168 | + print() |
| 169 | + |
| 170 | +###################################################################### |
167 | 171 | # If a user wants to trace an operator back to their model code, they would do
|
168 | 172 | # something similar to finding the module hierarchy and stack trace of the
|
169 |
| -# slowest ``convolution.out`` call:: |
170 |
| -# |
171 |
| -# for event_block in inspector.event_blocks: |
172 |
| -# # Via EventBlocks |
173 |
| -# slowest = None |
174 |
| -# for event in event_block.events: |
175 |
| -# if event.name == 'native_call_convolution.out': |
176 |
| -# if slowest is None or event.perf_data.p50 > slowest.perf_data.p50: |
177 |
| -# slowest = event |
178 |
| -# if slowest is not None: |
179 |
| -# print(slowest.name) |
180 |
| -# print() |
181 |
| -# pp.pprint(slowest.stack_traces) |
182 |
| -# print() |
183 |
| -# pp.pprint(slowest.module_hierarchy |
184 |
| -# |
185 |
| -# # Via Dataframe |
186 |
| -# df = event_block.to_dataframe() |
187 |
| -# df = df[df.event_name == 'native_call_convolution.out'] |
188 |
| -# if len(df) > 0: |
189 |
| -# slowest = df.loc[df['p50'].idxmax()] |
190 |
| -# print(slowest.event_name) |
191 |
| -# print() |
192 |
| -# pp.pprint(slowest.stack_traces) |
193 |
| -# print() |
194 |
| -# pp.pprint(slowest.module_hierarchy) |
195 |
| -# |
196 |
| -# If a user wants the total runtime of a module:: |
197 |
| -# |
198 |
| -# print(inspector.find_total_for_module("L__self___features")) |
199 |
| -# print(inspector.find_total_for_module("L__self___features_14")) |
200 |
| -# |
| 173 | +# slowest ``convolution.out`` call. |
| 174 | + |
| 175 | +for event_block in inspector.event_blocks: |
| 176 | + # Via EventBlocks |
| 177 | + slowest = None |
| 178 | + for event in event_block.events: |
| 179 | + if event.name == "native_call_convolution.out": |
| 180 | + if slowest is None or event.perf_data.p50 > slowest.perf_data.p50: |
| 181 | + slowest = event |
| 182 | + if slowest is not None: |
| 183 | + print(slowest.name) |
| 184 | + print() |
| 185 | + pp.pprint(slowest.stack_traces) |
| 186 | + print() |
| 187 | + pp.pprint(slowest.module_hierarchy) |
| 188 | + |
| 189 | + # Via Dataframe |
| 190 | + df = event_block.to_dataframe() |
| 191 | + df = df[df.event_name == "native_call_convolution.out"] |
| 192 | + if len(df) > 0: |
| 193 | + slowest = df.loc[df["p50"].idxmax()] |
| 194 | + print(slowest.event_name) |
| 195 | + print() |
| 196 | + pp.pprint(slowest.stack_traces) |
| 197 | + print() |
| 198 | + pp.pprint(slowest.module_hierarchy) |
| 199 | + |
| 200 | +###################################################################### |
| 201 | +# If a user wants the total runtime of a module, they can use |
| 202 | +# ``find_total_for_module``. |
| 203 | + |
| 204 | +print(inspector.find_total_for_module("L__self___features")) |
| 205 | +print(inspector.find_total_for_module("L__self___features_14")) |
| 206 | + |
| 207 | +###################################################################### |
201 | 208 | # Note: ``find_total_for_module`` is a special first class method of
|
202 | 209 | # `Inspector <../sdk-inspector.html>`__
|
203 |
| -# |
204 | 210 |
|
205 | 211 | ######################################################################
|
206 | 212 | # Conclusion
|
|
0 commit comments