|
57 | 57 | # edge dialect model (``EdgeProgramManager``), the ExecuTorch dialect model
|
58 | 58 | # (``ExecutorchProgramManager``), and an optional dictionary of additional models
|
59 | 59 | #
|
60 |
| -# In this tutorial, the mobilenet v2 example model is used to demonstrate:: |
61 |
| -# |
62 |
| -# # Imports |
63 |
| -# import copy |
64 |
| -# import torch |
65 |
| -# |
66 |
| -# from executorch.examples.models.mobilenet_v2 import MV2Model |
67 |
| -# from executorch.exir import ( |
68 |
| -# EdgeCompileConfig, |
69 |
| -# EdgeProgramManager, |
70 |
| -# ExecutorchProgramManager, |
71 |
| -# to_edge, |
72 |
| -# ) |
73 |
| -# from executorch.sdk import generate_etrecord |
74 |
| -# from torch.export import export, ExportedProgram |
75 |
| -# |
76 |
| -# # Generate MV2 Model |
77 |
| -# model: torch.nn.Module = MV2Model() |
78 |
| -# |
79 |
| -# aten_model: ExportedProgram = export( |
80 |
| -# model.get_eager_model().eval(), |
81 |
| -# model.get_example_inputs(), |
82 |
| -# ) |
83 |
| -# |
84 |
| -# edge_program_manager: EdgeProgramManager = to_edge(aten_model, compile_config=EdgeCompileConfig(_check_ir_validity=True)) |
85 |
| -# edge_program_manager_copy = copy.deepcopy(edge_program_manager) |
86 |
| -# et_program_manager: ExecutorchProgramManager = edge_program_manager_copy.to_executorch() |
87 |
| -# |
88 |
| -# |
89 |
| -# # Generate ETRecord |
90 |
| -# etrecord_path = "etrecord.bin" |
91 |
| -# generate_etrecord(etrecord_path, edge_program_manager, et_program_manager) |
92 |
| -# |
| 60 | +# In this tutorial, the mobilenet v2 example model is used to demonstrate. |
| 61 | + |
| 62 | +# Imports |
| 63 | +import copy |
| 64 | + |
| 65 | +import torch |
| 66 | + |
| 67 | +from executorch.examples.models.mobilenet_v2 import MV2Model |
| 68 | +from executorch.exir import ( |
| 69 | + EdgeCompileConfig, |
| 70 | + EdgeProgramManager, |
| 71 | + ExecutorchProgramManager, |
| 72 | + to_edge, |
| 73 | +) |
| 74 | +from executorch.sdk import generate_etrecord |
| 75 | +from torch.export import export, ExportedProgram |
| 76 | + |
| 77 | +# Generate MV2 Model |
| 78 | +model: torch.nn.Module = MV2Model() |
| 79 | + |
| 80 | +aten_model: ExportedProgram = export( |
| 81 | + model.get_eager_model().eval(), |
| 82 | + model.get_example_inputs(), |
| 83 | +) |
| 84 | + |
| 85 | +edge_program_manager: EdgeProgramManager = to_edge( |
| 86 | + aten_model, compile_config=EdgeCompileConfig(_check_ir_validity=True) |
| 87 | +) |
| 88 | +edge_program_manager_copy = copy.deepcopy(edge_program_manager) |
| 89 | +et_program_manager: ExecutorchProgramManager = edge_program_manager_copy.to_executorch() |
| 90 | + |
| 91 | + |
| 92 | +# Generate ETRecord |
| 93 | +etrecord_path = "etrecord.bin" |
| 94 | +generate_etrecord(etrecord_path, edge_program_manager, et_program_manager) |
| 95 | + |
| 96 | +###################################################################### |
93 | 97 | # .. warning::
|
94 | 98 | # Users should do a deepcopy of the output of to_edge() and pass in the
|
95 | 99 | # deepcopy to the generate_etrecord API. This is needed because the
|
|
131 | 135 | # Note: An ``ETRecord`` is not required. If an ``ETRecord`` is not provided,
|
132 | 136 | # the Inspector will show runtime results without operator correlation.
|
133 | 137 | #
|
134 |
| -# To visualize all runtime events, call ``print_data_tabular``:: |
135 |
| -# |
136 |
| -# from executorch.sdk import Inspector |
137 |
| -# |
138 |
| -# etdump_path = "etdump.etdp" |
139 |
| -# inspector = Inspector(etdump_path=etdump_path, etrecord_path=etrecord_path) |
140 |
| -# inspector.print_data_tabular() |
141 |
| -# |
| 138 | +# To visualize all runtime events, call Inspector's ``print_data_tabular``. |
142 | 139 |
|
| 140 | +from executorch.sdk import Inspector |
| 141 | + |
| 142 | +etrecord_path = "etdump.etdp" |
| 143 | +etdump_path = "etdump.etdp" |
| 144 | +inspector = Inspector(etdump_path=etdump_path, etrecord_path=etrecord_path) |
| 145 | +inspector.print_data_tabular() |
143 | 146 |
|
144 | 147 | ######################################################################
|
145 | 148 | # Analyzing with an Inspector
|
|
149 | 152 | # and ``DataFrames``. These mediums give users the ability to perform custom
|
150 | 153 | # analysis about their model performance.
|
151 | 154 | #
|
152 |
| -# Below are examples usages, with both ``EventBlock`` and ``DataFrame`` approaches:: |
153 |
| -# |
154 |
| -# # Set Up |
155 |
| -# |
156 |
| -# import pprint as pp |
157 |
| -# import pandas as pd |
158 |
| -# |
159 |
| -# pd.set_option('display.max_colwidth', None) |
160 |
| -# pd.set_option('display.max_columns', None) |
161 |
| -# |
| 155 | +# Below are examples usages, with both ``EventBlock`` and ``DataFrame`` approaches. |
| 156 | + |
| 157 | +# Set Up |
| 158 | +import pprint as pp |
| 159 | +import pandas as pd |
| 160 | + |
| 161 | +pd.set_option("display.max_colwidth", None) |
| 162 | +pd.set_option("display.max_columns", None) |
| 163 | + |
| 164 | +###################################################################### |
162 | 165 | # If a user wants the raw profiling results, they would do something similar to
|
163 |
| -# finding the raw runtime data of an ``addmm.out`` event:: |
164 |
| -# |
165 |
| -# for event_block in inspector.event_blocks: |
166 |
| -# # Via EventBlocks |
167 |
| -# for event in event_block.events: |
168 |
| -# if event.name == 'native_call_addmm.out': |
169 |
| -# print(event.name, event.perf_data.raw) |
170 |
| -# |
171 |
| -# # Via Dataframe |
172 |
| -# df = event_block.to_dataframe() |
173 |
| -# df = df[df.event_name == 'native_call_addmm.out'] |
174 |
| -# print(df[['event_name', 'raw']]) |
175 |
| -# print() |
176 |
| -# |
| 166 | +# finding the raw runtime data of an ``addmm.out`` event. |
| 167 | + |
| 168 | +for event_block in inspector.event_blocks: |
| 169 | + # Via EventBlocks |
| 170 | + for event in event_block.events: |
| 171 | + if event.name == "native_call_addmm.out": |
| 172 | + print(event.name, event.perf_data.raw) |
| 173 | + |
| 174 | + # Via Dataframe |
| 175 | + df = event_block.to_dataframe() |
| 176 | + df = df[df.event_name == "native_call_addmm.out"] |
| 177 | + print(df[["event_name', 'raw"]]) |
| 178 | + print() |
| 179 | + |
| 180 | +###################################################################### |
177 | 181 | # If a user wants to trace an operator back to their model code, they would do
|
178 | 182 | # something similar to finding the module hierarchy and stack trace of the
|
179 |
| -# slowest ``convolution.out`` call:: |
180 |
| -# |
181 |
| -# for event_block in inspector.event_blocks: |
182 |
| -# # Via EventBlocks |
183 |
| -# slowest = None |
184 |
| -# for event in event_block.events: |
185 |
| -# if event.name == 'native_call_convolution.out': |
186 |
| -# if slowest is None or event.perf_data.p50 > slowest.perf_data.p50: |
187 |
| -# slowest = event |
188 |
| -# if slowest is not None: |
189 |
| -# print(slowest.name) |
190 |
| -# print() |
191 |
| -# pp.pprint(slowest.stack_traces) |
192 |
| -# print() |
193 |
| -# pp.pprint(slowest.module_hierarchy |
194 |
| -# |
195 |
| -# # Via Dataframe |
196 |
| -# df = event_block.to_dataframe() |
197 |
| -# df = df[df.event_name == 'native_call_convolution.out'] |
198 |
| -# if len(df) > 0: |
199 |
| -# slowest = df.loc[df['p50'].idxmax()] |
200 |
| -# print(slowest.event_name) |
201 |
| -# print() |
202 |
| -# pp.pprint(slowest.stack_traces) |
203 |
| -# print() |
204 |
| -# pp.pprint(slowest.module_hierarchy) |
205 |
| -# |
206 |
| -# If a user wants the total runtime of a module:: |
207 |
| -# |
208 |
| -# print(inspector.find_total_for_module("L__self___features")) |
209 |
| -# print(inspector.find_total_for_module("L__self___features_14")) |
210 |
| -# |
| 183 | +# slowest ``convolution.out`` call. |
| 184 | + |
| 185 | +for event_block in inspector.event_blocks: |
| 186 | + # Via EventBlocks |
| 187 | + slowest = None |
| 188 | + for event in event_block.events: |
| 189 | + if event.name == "native_call_convolution.out": |
| 190 | + if slowest is None or event.perf_data.p50 > slowest.perf_data.p50: |
| 191 | + slowest = event |
| 192 | + if slowest is not None: |
| 193 | + print(slowest.name) |
| 194 | + print() |
| 195 | + pp.pprint(slowest.stack_traces) |
| 196 | + print() |
| 197 | + pp.pprint(slowest.module_hierarchy) |
| 198 | + |
| 199 | + # Via Dataframe |
| 200 | + df = event_block.to_dataframe() |
| 201 | + df = df[df.event_name == "native_call_convolution.out"] |
| 202 | + if len(df) > 0: |
| 203 | + slowest = df.loc[df["p50"].idxmax()] |
| 204 | + print(slowest.event_name) |
| 205 | + print() |
| 206 | + pp.pprint(slowest.stack_traces) |
| 207 | + print() |
| 208 | + pp.pprint(slowest.module_hierarchy) |
| 209 | + |
| 210 | +###################################################################### |
| 211 | +# If a user wants the total runtime of a module, they can use |
| 212 | +# ``find_total_for_module``. |
| 213 | + |
| 214 | +print(inspector.find_total_for_module("L__self___features")) |
| 215 | +print(inspector.find_total_for_module("L__self___features_14")) |
| 216 | + |
| 217 | +###################################################################### |
211 | 218 | # Note: ``find_total_for_module`` is a special first class method of
|
212 | 219 | # `Inspector <../sdk-inspector.html>`__
|
213 |
| -# |
214 | 220 |
|
215 | 221 | ######################################################################
|
216 | 222 | # Conclusion
|
|
0 commit comments