|
57 | 57 | # edge dialect model (``EdgeProgramManager``), the ExecuTorch dialect model
|
58 | 58 | # (``ExecutorchProgramManager``), and an optional dictionary of additional models
|
59 | 59 | #
|
60 |
| -# In this tutorial, the mobilenet v2 example model is used to demonstrate:: |
61 |
| -# |
| 60 | +# In this tutorial, the mobilenet v2 example model is used to demonstrate. |
| 61 | + |
| 62 | +###################################################################### |
62 | 63 | # # Imports
|
63 | 64 | # import copy
|
| 65 | +# |
64 | 66 | # import torch
|
65 | 67 | #
|
66 | 68 | # from executorch.examples.models.mobilenet_v2 import MV2Model
|
|
81 | 83 | # model.get_example_inputs(),
|
82 | 84 | # )
|
83 | 85 | #
|
84 |
| -# edge_program_manager: EdgeProgramManager = to_edge(aten_model, compile_config=EdgeCompileConfig(_check_ir_validity=True)) |
| 86 | +# edge_program_manager: EdgeProgramManager = to_edge( |
| 87 | +# aten_model, compile_config=EdgeCompileConfig(_check_ir_validity=True) |
| 88 | +# ) |
85 | 89 | # edge_program_manager_copy = copy.deepcopy(edge_program_manager)
|
86 | 90 | # et_program_manager: ExecutorchProgramManager = edge_program_manager_copy.to_executorch()
|
87 | 91 | #
|
88 | 92 | #
|
89 | 93 | # # Generate ETRecord
|
90 | 94 | # etrecord_path = "etrecord.bin"
|
91 | 95 | # generate_etrecord(etrecord_path, edge_program_manager, et_program_manager)
|
92 |
| -# |
| 96 | + |
| 97 | +###################################################################### |
93 | 98 | # .. warning::
|
94 | 99 | # Users should do a deepcopy of the output of to_edge() and pass in the
|
95 | 100 | # deepcopy to the generate_etrecord API. This is needed because the
|
|
131 | 136 | # Note: An ``ETRecord`` is not required. If an ``ETRecord`` is not provided,
|
132 | 137 | # the Inspector will show runtime results without operator correlation.
|
133 | 138 | #
|
134 |
| -# To visualize all runtime events, call ``print_data_tabular``:: |
135 |
| -# |
| 139 | +# To visualize all runtime events, call Inspector's ``print_data_tabular``. |
| 140 | + |
| 141 | +###################################################################### |
136 | 142 | # from executorch.sdk import Inspector
|
137 | 143 | #
|
| 144 | +# etrecord_path = "etdump.etdp" |
138 | 145 | # etdump_path = "etdump.etdp"
|
139 | 146 | # inspector = Inspector(etdump_path=etdump_path, etrecord_path=etrecord_path)
|
140 | 147 | # inspector.print_data_tabular()
|
141 |
| -# |
142 |
| - |
143 | 148 |
|
144 | 149 | ######################################################################
|
145 | 150 | # Analyzing with an Inspector
|
|
149 | 154 | # and ``DataFrames``. These mediums give users the ability to perform custom
|
150 | 155 | # analysis about their model performance.
|
151 | 156 | #
|
152 |
| -# Below are examples usages, with both ``EventBlock`` and ``DataFrame`` approaches:: |
153 |
| -# |
| 157 | +# Below are examples usages, with both ``EventBlock`` and ``DataFrame`` approaches. |
| 158 | + |
| 159 | +###################################################################### |
154 | 160 | # # Set Up
|
155 |
| -# |
156 | 161 | # import pprint as pp
|
157 |
| -# import pandas as pd |
158 | 162 | #
|
159 |
| -# pd.set_option('display.max_colwidth', None) |
160 |
| -# pd.set_option('display.max_columns', None) |
| 163 | +# import pandas as pd |
161 | 164 | #
|
| 165 | +# pd.set_option("display.max_colwidth", None) |
| 166 | +# pd.set_option("display.max_columns", None) |
| 167 | + |
| 168 | +###################################################################### |
162 | 169 | # If a user wants the raw profiling results, they would do something similar to
|
163 |
| -# finding the raw runtime data of an ``addmm.out`` event:: |
164 |
| -# |
| 170 | +# finding the raw runtime data of an ``addmm.out`` event. |
| 171 | + |
| 172 | +###################################################################### |
165 | 173 | # for event_block in inspector.event_blocks:
|
166 | 174 | # # Via EventBlocks
|
167 | 175 | # for event in event_block.events:
|
168 |
| -# if event.name == 'native_call_addmm.out': |
| 176 | +# if event.name == "native_call_addmm.out": |
169 | 177 | # print(event.name, event.perf_data.raw)
|
170 | 178 | #
|
171 | 179 | # # Via Dataframe
|
172 | 180 | # df = event_block.to_dataframe()
|
173 |
| -# df = df[df.event_name == 'native_call_addmm.out'] |
174 |
| -# print(df[['event_name', 'raw']]) |
| 181 | +# df = df[df.event_name == "native_call_addmm.out"] |
| 182 | +# print(df[["event_name', 'raw"]]) |
175 | 183 | # print()
|
176 |
| -# |
| 184 | + |
| 185 | +###################################################################### |
177 | 186 | # If a user wants to trace an operator back to their model code, they would do
|
178 | 187 | # something similar to finding the module hierarchy and stack trace of the
|
179 |
| -# slowest ``convolution.out`` call:: |
180 |
| -# |
| 188 | +# slowest ``convolution.out`` call. |
| 189 | + |
| 190 | +###################################################################### |
181 | 191 | # for event_block in inspector.event_blocks:
|
182 | 192 | # # Via EventBlocks
|
183 | 193 | # slowest = None
|
184 | 194 | # for event in event_block.events:
|
185 |
| -# if event.name == 'native_call_convolution.out': |
| 195 | +# if event.name == "native_call_convolution.out": |
186 | 196 | # if slowest is None or event.perf_data.p50 > slowest.perf_data.p50:
|
187 | 197 | # slowest = event
|
188 | 198 | # if slowest is not None:
|
189 | 199 | # print(slowest.name)
|
190 | 200 | # print()
|
191 | 201 | # pp.pprint(slowest.stack_traces)
|
192 | 202 | # print()
|
193 |
| -# pp.pprint(slowest.module_hierarchy |
| 203 | +# pp.pprint(slowest.module_hierarchy) |
194 | 204 | #
|
195 | 205 | # # Via Dataframe
|
196 | 206 | # df = event_block.to_dataframe()
|
197 |
| -# df = df[df.event_name == 'native_call_convolution.out'] |
| 207 | +# df = df[df.event_name == "native_call_convolution.out"] |
198 | 208 | # if len(df) > 0:
|
199 |
| -# slowest = df.loc[df['p50'].idxmax()] |
| 209 | +# slowest = df.loc[df["p50"].idxmax()] |
200 | 210 | # print(slowest.event_name)
|
201 | 211 | # print()
|
202 | 212 | # pp.pprint(slowest.stack_traces)
|
203 | 213 | # print()
|
204 | 214 | # pp.pprint(slowest.module_hierarchy)
|
205 |
| -# |
206 |
| -# If a user wants the total runtime of a module:: |
207 |
| -# |
| 215 | + |
| 216 | +###################################################################### |
| 217 | +# If a user wants the total runtime of a module, they can use |
| 218 | +# ``find_total_for_module``. |
| 219 | + |
| 220 | +###################################################################### |
208 | 221 | # print(inspector.find_total_for_module("L__self___features"))
|
209 | 222 | # print(inspector.find_total_for_module("L__self___features_14"))
|
210 |
| -# |
| 223 | + |
| 224 | +###################################################################### |
211 | 225 | # Note: ``find_total_for_module`` is a special first class method of
|
212 | 226 | # `Inspector <../sdk-inspector.html>`__
|
213 |
| -# |
214 | 227 |
|
215 | 228 | ######################################################################
|
216 | 229 | # Conclusion
|
|
0 commit comments