|
32 | 32 | #
|
33 | 33 | # Set up a conda environment. To set up a conda environment in Google Colab::
|
34 | 34 | #
|
35 |
| -# !pip install -q condacolab |
36 |
| -# import condacolab |
37 |
| -# condacolab.install() |
| 35 | +# !pip install -q condacolab |
| 36 | +# import condacolab |
| 37 | +# condacolab.install() |
38 | 38 | #
|
39 |
| -# !conda create --name executorch python=3.10 |
40 |
| -# !conda install -c conda-forge flatbuffers |
| 39 | +# !conda create --name executorch python=3.10 |
| 40 | +# !conda install -c conda-forge flatbuffers |
41 | 41 | #
|
42 | 42 | # Install ExecuTorch from source. If cloning is failing on Google Colab, make
|
43 | 43 | # sure Colab -> Setting -> Github -> Access Private Repo is checked::
|
44 | 44 | #
|
45 |
| -# !git clone https://{github_username}:{token}@github.com/pytorch/executorch.git |
46 |
| -# !cd executorch && bash ./install_requirements.sh |
| 45 | +# !git clone https://{github_username}:{token}@github.com/pytorch/executorch.git |
| 46 | +# !cd executorch && bash ./install_requirements.sh |
47 | 47 |
|
48 | 48 | ######################################################################
|
49 | 49 | # Generate ETRecord (Optional)
|
|
57 | 57 | # edge dialect model (``EdgeProgramManager``), the ExecuTorch dialect model
|
58 | 58 | # (``ExecutorchProgramManager``), and an optional dictionary of additional models
|
59 | 59 | #
|
60 |
| -# In this tutorial, the mobilenet v2 example model is used to demonstrate:: |
61 |
| -# |
62 |
| -# # Imports |
63 |
| -# import copy |
64 |
| -# |
65 |
| -# import torch |
66 |
| -# |
67 |
| -# from executorch.examples.models.mobilenet_v2 import MV2Model |
68 |
| -# from executorch.exir import ( |
69 |
| -# EdgeCompileConfig, |
70 |
| -# EdgeProgramManager, |
71 |
| -# ExecutorchProgramManager, |
72 |
| -# to_edge, |
73 |
| -# ) |
74 |
| -# from executorch.sdk import generate_etrecord |
75 |
| -# from torch.export import export, ExportedProgram |
76 |
| -# |
77 |
| -# # Generate MV2 Model |
78 |
| -# model: torch.nn.Module = MV2Model() |
79 |
| -# |
80 |
| -# aten_model: ExportedProgram = export( |
81 |
| -# model.get_eager_model().eval(), |
82 |
| -# model.get_example_inputs(), |
83 |
| -# ) |
84 |
| -# |
85 |
| -# edge_program_manager: EdgeProgramManager = to_edge( |
86 |
| -# aten_model, compile_config=EdgeCompileConfig(_check_ir_validity=True) |
87 |
| -# ) |
88 |
| -# edge_program_manager_copy = copy.deepcopy(edge_program_manager) |
89 |
| -# et_program_manager: ExecutorchProgramManager = edge_program_manager_copy.to_executorch() |
90 |
| -# |
91 |
| -# |
92 |
| -# # Generate ETRecord |
93 |
| -# etrecord_path = "etrecord.bin" |
94 |
| -# generate_etrecord(etrecord_path, edge_program_manager, et_program_manager) |
| 60 | +# In this tutorial, the mobilenet v2 example model is used to demonstrate. |
| 61 | + |
| 62 | +import copy |
| 63 | + |
| 64 | +import torch |
| 65 | + |
| 66 | +from executorch.examples.models.mobilenet_v2 import MV2Model |
| 67 | +from executorch.exir import ( |
| 68 | + EdgeCompileConfig, |
| 69 | + EdgeProgramManager, |
| 70 | + ExecutorchProgramManager, |
| 71 | + to_edge, |
| 72 | +) |
| 73 | +from executorch.sdk import generate_etrecord |
| 74 | +from torch.export import export, ExportedProgram |
| 75 | + |
| 76 | + |
| 77 | +# Generate MV2 Model |
| 78 | +model: torch.nn.Module = MV2Model() |
| 79 | + |
| 80 | +aten_model: ExportedProgram = export( |
| 81 | + model.get_eager_model().eval(), |
| 82 | + model.get_example_inputs(), |
| 83 | +) |
| 84 | + |
| 85 | +edge_program_manager: EdgeProgramManager = to_edge( |
| 86 | + aten_model, compile_config=EdgeCompileConfig(_check_ir_validity=True) |
| 87 | +) |
| 88 | +edge_program_manager_copy = copy.deepcopy(edge_program_manager) |
| 89 | +et_program_manager: ExecutorchProgramManager = edge_program_manager_copy.to_executorch() |
| 90 | + |
| 91 | + |
| 92 | +# Generate ETRecord |
| 93 | +etrecord_path = "etrecord.bin" |
| 94 | +generate_etrecord(etrecord_path, edge_program_manager, et_program_manager) |
| 95 | + |
| 96 | +# sphinx_gallery_start_ignore |
| 97 | +from unittest.mock import patch |
| 98 | + |
| 99 | +# sphinx_gallery_end_ignore |
| 100 | + |
| 101 | +###################################################################### |
95 | 102 | #
|
96 | 103 | # .. warning::
|
97 | 104 | # Users should do a deepcopy of the output of to_edge() and pass in the
|
|
111 | 118 | #
|
112 | 119 | # Use Buck::
|
113 | 120 | #
|
114 |
| -# python3 -m examples.sdk.scripts.export_bundled_program -m mv2 |
115 |
| -# buck2 run -c executorch.event_tracer_enabled=true examples/sdk/sdk_example_runner:sdk_example_runner -- --bundled_program_path mv2_bundled.bp |
| 121 | +# python3 -m examples.sdk.scripts.export_bundled_program -m mv2 |
| 122 | +# buck2 run -c executorch.event_tracer_enabled=true examples/sdk/sdk_example_runner:sdk_example_runner -- --bundled_program_path mv2_bundled.bp |
116 | 123 | #
|
117 | 124 | # **Option 2:**
|
118 | 125 | #
|
119 | 126 | # Use CMake::
|
120 | 127 | #
|
121 |
| -# cd executorch |
122 |
| -# rm -rf cmake-out && mkdir cmake-out && cd cmake-out && cmake -DBUCK2=buck2 -DEXECUTORCH_BUILD_SDK=1 -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=1 .. |
123 |
| -# cd .. |
124 |
| -# cmake --build cmake-out -j8 -t sdk_example_runner |
125 |
| -# ./cmake-out/examples/sdk/sdk_example_runner --bundled_program_path mv2_bundled.bp |
| 128 | +# cd executorch |
| 129 | +# rm -rf cmake-out && mkdir cmake-out && cd cmake-out && cmake -DBUCK2=buck2 -DEXECUTORCH_BUILD_SDK=1 -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=1 .. |
| 130 | +# cd .. |
| 131 | +# cmake --build cmake-out -j8 -t sdk_example_runner |
| 132 | +# ./cmake-out/examples/sdk/sdk_example_runner --bundled_program_path mv2_bundled.bp |
126 | 133 |
|
127 | 134 | ######################################################################
|
128 | 135 | # Creating an Inspector
|
|
135 | 142 | # Note: An ``ETRecord`` is not required. If an ``ETRecord`` is not provided,
|
136 | 143 | # the Inspector will show runtime results without operator correlation.
|
137 | 144 | #
|
138 |
| -# To visualize all runtime events, call Inspector's ``print_data_tabular``:: |
139 |
| -# |
140 |
| -# from executorch.sdk import Inspector |
141 |
| -# |
142 |
| -# etdump_path = "etdump.etdp" |
143 |
| -# inspector = Inspector(etdump_path=etdump_path, etrecord_path=etrecord_path) |
144 |
| -# inspector.print_data_tabular() |
145 |
| -# |
| 145 | +# To visualize all runtime events, call Inspector's ``print_data_tabular``. |
| 146 | + |
| 147 | +from executorch.sdk import Inspector |
| 148 | + |
| 149 | +# sphinx_gallery_start_ignore |
| 150 | +inspector_patch = patch.object(Inspector, "__init__", return_value=None) |
| 151 | +inspector_patch_print = patch.object(Inspector, "print_data_tabular", return_value=None) |
| 152 | +inspector_patch.start() |
| 153 | +inspector_patch_print.start() |
| 154 | +# sphinx_gallery_end_ignore |
| 155 | +etdump_path = "etdump.etdp" |
| 156 | +inspector = Inspector(etdump_path=etdump_path, etrecord_path=etrecord_path) |
| 157 | +# sphinx_gallery_start_ignore |
| 158 | +inspector.event_blocks = [] |
| 159 | +# sphinx_gallery_end_ignore |
| 160 | +inspector.print_data_tabular() |
| 161 | + |
| 162 | +# sphinx_gallery_start_ignore |
| 163 | +inspector_patch.stop() |
| 164 | +inspector_patch_print.stop() |
| 165 | +# sphinx_gallery_end_ignore |
146 | 166 |
|
147 | 167 | ######################################################################
|
148 | 168 | # Analyzing with an Inspector
|
|
152 | 172 | # and ``DataFrames``. These mediums give users the ability to perform custom
|
153 | 173 | # analysis about their model performance.
|
154 | 174 | #
|
155 |
| -# Below are examples usages, with both ``EventBlock`` and ``DataFrame`` approaches:: |
156 |
| -# |
157 |
| -# # Set Up |
158 |
| -# |
159 |
| -# import pprint as pp |
160 |
| -# |
161 |
| -# import pandas as pd |
162 |
| -# |
163 |
| -# pd.set_option("display.max_colwidth", None) |
164 |
| -# pd.set_option("display.max_columns", None) |
| 175 | +# Below are examples usages, with both ``EventBlock`` and ``DataFrame`` approaches. |
| 176 | + |
| 177 | +# Set Up |
| 178 | +import pprint as pp |
| 179 | + |
| 180 | +import pandas as pd |
| 181 | + |
| 182 | +pd.set_option("display.max_colwidth", None) |
| 183 | +pd.set_option("display.max_columns", None) |
165 | 184 |
|
166 | 185 | ######################################################################
|
167 | 186 | # If a user wants the raw profiling results, they would do something similar to
|
168 |
| -# finding the raw runtime data of an ``addmm.out`` event:: |
169 |
| -# |
170 |
| -# for event_block in inspector.event_blocks: |
171 |
| -# # Via EventBlocks |
172 |
| -# for event in event_block.events: |
173 |
| -# if event.name == "native_call_addmm.out": |
174 |
| -# print(event.name, event.perf_data.raw) |
175 |
| -# |
176 |
| -# # Via Dataframe |
177 |
| -# df = event_block.to_dataframe() |
178 |
| -# df = df[df.event_name == "native_call_addmm.out"] |
179 |
| -# print(df[["event_name", "raw"]]) |
180 |
| -# print() |
181 |
| -# |
| 187 | +# finding the raw runtime data of an ``addmm.out`` event. |
| 188 | + |
| 189 | +for event_block in inspector.event_blocks: |
| 190 | + # Via EventBlocks |
| 191 | + for event in event_block.events: |
| 192 | + if event.name == "native_call_addmm.out": |
| 193 | + print(event.name, event.perf_data.raw) |
| 194 | + |
| 195 | + # Via Dataframe |
| 196 | + df = event_block.to_dataframe() |
| 197 | + df = df[df.event_name == "native_call_addmm.out"] |
| 198 | + print(df[["event_name", "raw"]]) |
| 199 | + print() |
182 | 200 |
|
183 | 201 | ######################################################################
|
184 | 202 | # If a user wants to trace an operator back to their model code, they would do
|
185 | 203 | # something similar to finding the module hierarchy and stack trace of the
|
186 |
| -# slowest ``convolution.out`` call:: |
187 |
| -# |
188 |
| -# for event_block in inspector.event_blocks: |
189 |
| -# # Via EventBlocks |
190 |
| -# slowest = None |
191 |
| -# for event in event_block.events: |
192 |
| -# if event.name == "native_call_convolution.out": |
193 |
| -# if slowest is None or event.perf_data.p50 > slowest.perf_data.p50: |
194 |
| -# slowest = event |
195 |
| -# if slowest is not None: |
196 |
| -# print(slowest.name) |
197 |
| -# print() |
198 |
| -# pp.pprint(slowest.stack_traces) |
199 |
| -# print() |
200 |
| -# pp.pprint(slowest.module_hierarchy) |
201 |
| -# |
202 |
| -# # Via Dataframe |
203 |
| -# df = event_block.to_dataframe() |
204 |
| -# df = df[df.event_name == "native_call_convolution.out"] |
205 |
| -# if len(df) > 0: |
206 |
| -# slowest = df.loc[df["p50"].idxmax()] |
207 |
| -# print(slowest.event_name) |
208 |
| -# print() |
209 |
| -# pp.pprint(slowest.stack_traces) |
210 |
| -# print() |
211 |
| -# pp.pprint(slowest.module_hierarchy) |
212 |
| -# |
| 204 | +# slowest ``convolution.out`` call. |
| 205 | + |
| 206 | +for event_block in inspector.event_blocks: |
| 207 | + # Via EventBlocks |
| 208 | + slowest = None |
| 209 | + for event in event_block.events: |
| 210 | + if event.name == "native_call_convolution.out": |
| 211 | + if slowest is None or event.perf_data.p50 > slowest.perf_data.p50: |
| 212 | + slowest = event |
| 213 | + if slowest is not None: |
| 214 | + print(slowest.name) |
| 215 | + print() |
| 216 | + pp.pprint(slowest.stack_traces) |
| 217 | + print() |
| 218 | + pp.pprint(slowest.module_hierarchy) |
| 219 | + |
| 220 | + # Via Dataframe |
| 221 | + df = event_block.to_dataframe() |
| 222 | + df = df[df.event_name == "native_call_convolution.out"] |
| 223 | + if len(df) > 0: |
| 224 | + slowest = df.loc[df["p50"].idxmax()] |
| 225 | + print(slowest.event_name) |
| 226 | + print() |
| 227 | + pp.pprint(slowest.stack_traces) |
| 228 | + print() |
| 229 | + pp.pprint(slowest.module_hierarchy) |
213 | 230 |
|
214 | 231 | ######################################################################
|
215 | 232 | # If a user wants the total runtime of a module, they can use
|
216 |
| -# ``find_total_for_module``:: |
217 |
| -# |
218 |
| -# print(inspector.find_total_for_module("L__self___features")) |
219 |
| -# print(inspector.find_total_for_module("L__self___features_14")) |
| 233 | +# ``find_total_for_module``. |
| 234 | + |
| 235 | +print(inspector.find_total_for_module("L__self___features")) |
| 236 | +print(inspector.find_total_for_module("L__self___features_14")) |
220 | 237 |
|
221 | 238 | ######################################################################
|
222 | 239 | # Note: ``find_total_for_module`` is a special first class method of
|
|
0 commit comments