|
| 1 | +# -*- coding: utf-8 -*- |
| 2 | +# Copyright (c) Meta Platforms, Inc. and affiliates. |
| 3 | +# All rights reserved. |
| 4 | +# |
| 5 | +# This source code is licensed under the BSD-style license found in the |
| 6 | +# LICENSE file in the root directory of this source tree. |
| 7 | + |
| 8 | +""" |
| 9 | +Using the ExecuTorch Developer Tools to Profile a Model |
| 10 | +======================== |
| 11 | +
|
| 12 | +**Author:** `Jack Khuu <https://github.com/Jack-Khuu>`__ |
| 13 | +""" |
| 14 | + |
| 15 | +###################################################################### |
| 16 | +# The `ExecuTorch Developer Tools <../sdk-overview.html>`__ is a set of tools designed to |
| 17 | +# provide users with the ability to profile, debug, and visualize ExecuTorch |
| 18 | +# models. |
| 19 | +# |
| 20 | +# This tutorial will show a full end-to-end flow of how to utilize the Developer Tools to profile a model. |
| 21 | +# Specifically, it will: |
| 22 | +# |
| 23 | +# 1. Generate the artifacts consumed by the Developer Tools (`ETRecord <../sdk-etrecord.html>`__, `ETDump <../sdk-etdump.html>`__). |
| 24 | +# 2. Create an Inspector class consuming these artifacts. |
| 25 | +# 3. Utilize the Inspector class to analyze the model profiling result. |
| 26 | + |
| 27 | +###################################################################### |
| 28 | +# Prerequisites |
| 29 | +# ------------- |
| 30 | +# |
| 31 | +# To run this tutorial, you’ll first need to |
| 32 | +# `Set up your ExecuTorch environment <../getting-started-setup.html>`__. |
| 33 | +# |
| 34 | + |
| 35 | +###################################################################### |
| 36 | +# Generate ETRecord (Optional) |
| 37 | +# ---------------------------- |
| 38 | +# |
| 39 | +# The first step is to generate an ``ETRecord``. ``ETRecord`` contains model |
| 40 | +# graphs and metadata for linking runtime results (such as profiling) to |
| 41 | +# the eager model. This is generated via ``executorch.devtools.generate_etrecord``. |
| 42 | +# |
| 43 | +# ``executorch.devtools.generate_etrecord`` takes in an output file path (str), the |
| 44 | +# edge dialect model (``EdgeProgramManager``), the ExecuTorch dialect model |
| 45 | +# (``ExecutorchProgramManager``), and an optional dictionary of additional models. |
| 46 | +# |
| 47 | +# In this tutorial, an example model (shown below) is used to demonstrate. |
| 48 | + |
| 49 | +import copy |
| 50 | + |
| 51 | +import torch |
| 52 | +import torch.nn as nn |
| 53 | +import torch.nn.functional as F |
| 54 | +from executorch.devtools import generate_etrecord |
| 55 | + |
| 56 | +from executorch.exir import ( |
| 57 | + EdgeCompileConfig, |
| 58 | + EdgeProgramManager, |
| 59 | + ExecutorchProgramManager, |
| 60 | + to_edge, |
| 61 | +) |
| 62 | +from torch.export import export, ExportedProgram |
| 63 | + |
| 64 | + |
| 65 | +# Generate Model |
| 66 | +class Net(nn.Module): |
| 67 | + def __init__(self): |
| 68 | + super(Net, self).__init__() |
| 69 | + # 1 input image channel, 6 output channels, 5x5 square convolution |
| 70 | + # kernel |
| 71 | + self.conv1 = nn.Conv2d(1, 6, 5) |
| 72 | + self.conv2 = nn.Conv2d(6, 16, 5) |
| 73 | + # an affine operation: y = Wx + b |
| 74 | + self.fc1 = nn.Linear(16 * 5 * 5, 120) # 5*5 from image dimension |
| 75 | + self.fc2 = nn.Linear(120, 84) |
| 76 | + self.fc3 = nn.Linear(84, 10) |
| 77 | + |
| 78 | + def forward(self, x): |
| 79 | + # Max pooling over a (2, 2) window |
| 80 | + x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2)) |
| 81 | + # If the size is a square, you can specify with a single number |
| 82 | + x = F.max_pool2d(F.relu(self.conv2(x)), 2) |
| 83 | + x = torch.flatten(x, 1) # flatten all dimensions except the batch dimension |
| 84 | + x = F.relu(self.fc1(x)) |
| 85 | + x = F.relu(self.fc2(x)) |
| 86 | + x = self.fc3(x) |
| 87 | + return x |
| 88 | + |
| 89 | + |
| 90 | +model = Net() |
| 91 | + |
| 92 | +aten_model: ExportedProgram = export( |
| 93 | + model, |
| 94 | + (torch.randn(1, 1, 32, 32),), |
| 95 | +) |
| 96 | + |
| 97 | +edge_program_manager: EdgeProgramManager = to_edge( |
| 98 | + aten_model, compile_config=EdgeCompileConfig(_check_ir_validity=True) |
| 99 | +) |
| 100 | +edge_program_manager_copy = copy.deepcopy(edge_program_manager) |
| 101 | +et_program_manager: ExecutorchProgramManager = edge_program_manager.to_executorch() |
| 102 | + |
| 103 | + |
| 104 | +# Generate ETRecord |
| 105 | +etrecord_path = "etrecord.bin" |
| 106 | +generate_etrecord(etrecord_path, edge_program_manager_copy, et_program_manager) |
| 107 | + |
| 108 | +# sphinx_gallery_start_ignore |
| 109 | +from unittest.mock import patch |
| 110 | + |
| 111 | +# sphinx_gallery_end_ignore |
| 112 | + |
| 113 | +###################################################################### |
| 114 | +# |
| 115 | +# .. warning:: |
| 116 | +# Users should do a deepcopy of the output of ``to_edge()`` and pass in the |
| 117 | +# deepcopy to the ``generate_etrecord`` API. This is needed because the |
| 118 | +# subsequent call, ``to_executorch()``, does an in-place mutation and will |
| 119 | +# lose debug data in the process. |
| 120 | +# |
| 121 | + |
| 122 | +###################################################################### |
| 123 | +# Generate ETDump |
| 124 | +# --------------- |
| 125 | +# |
| 126 | +# Next step is to generate an ``ETDump``. ``ETDump`` contains runtime results |
| 127 | +# from executing a `Bundled Program Model <../sdk-bundled-io.html>`__. |
| 128 | +# |
| 129 | +# In this tutorial, a `Bundled Program` is created from the example model above. |
| 130 | + |
| 131 | +import torch |
| 132 | +from executorch.devtools import BundledProgram |
| 133 | + |
| 134 | +from executorch.devtools.bundled_program.config import MethodTestCase, MethodTestSuite |
| 135 | +from executorch.devtools.bundled_program.serialize import ( |
| 136 | + serialize_from_bundled_program_to_flatbuffer, |
| 137 | +) |
| 138 | + |
| 139 | +from executorch.exir import to_edge |
| 140 | +from torch.export import export |
| 141 | + |
| 142 | +# Step 1: ExecuTorch Program Export |
| 143 | +m_name = "forward" |
| 144 | +method_graphs = {m_name: export(model, (torch.randn(1, 1, 32, 32),))} |
| 145 | + |
| 146 | +# Step 2: Construct Method Test Suites |
| 147 | +inputs = [[torch.randn(1, 1, 32, 32)] for _ in range(2)] |
| 148 | + |
| 149 | +method_test_suites = [ |
| 150 | + MethodTestSuite( |
| 151 | + method_name=m_name, |
| 152 | + test_cases=[ |
| 153 | + MethodTestCase(inputs=inp, expected_outputs=getattr(model, m_name)(*inp)) |
| 154 | + for inp in inputs |
| 155 | + ], |
| 156 | + ) |
| 157 | +] |
| 158 | + |
| 159 | +# Step 3: Generate BundledProgram |
| 160 | +executorch_program = to_edge(method_graphs).to_executorch() |
| 161 | +bundled_program = BundledProgram(executorch_program, method_test_suites) |
| 162 | + |
| 163 | +# Step 4: Serialize BundledProgram to flatbuffer. |
| 164 | +serialized_bundled_program = serialize_from_bundled_program_to_flatbuffer( |
| 165 | + bundled_program |
| 166 | +) |
| 167 | +save_path = "bundled_program.bp" |
| 168 | +with open(save_path, "wb") as f: |
| 169 | + f.write(serialized_bundled_program) |
| 170 | + |
| 171 | +###################################################################### |
| 172 | +# Use CMake (follow `these instructions <../runtime-build-and-cross-compilation.html#configure-the-cmake-build>`__ to set up cmake) to execute the Bundled Program to generate the ``ETDump``:: |
| 173 | +# |
| 174 | +# cd executorch |
| 175 | +# ./examples/devtools/build_example_runner.sh |
| 176 | +# cmake-out/examples/devtools/example_runner --bundled_program_path="bundled_program.bp" |
| 177 | + |
| 178 | +###################################################################### |
| 179 | +# Creating an Inspector |
| 180 | +# --------------------- |
| 181 | +# |
| 182 | +# Final step is to create the ``Inspector`` by passing in the artifact paths. |
| 183 | +# Inspector takes the runtime results from ``ETDump`` and correlates them to |
| 184 | +# the operators of the Edge Dialect Graph. |
| 185 | +# |
| 186 | +# Recall: An ``ETRecord`` is not required. If an ``ETRecord`` is not provided, |
| 187 | +# the Inspector will show runtime results without operator correlation. |
| 188 | +# |
| 189 | +# To visualize all runtime events, call Inspector's ``print_data_tabular``. |
| 190 | + |
| 191 | +from executorch.devtools import Inspector |
| 192 | + |
| 193 | +# sphinx_gallery_start_ignore |
| 194 | +inspector_patch = patch.object(Inspector, "__init__", return_value=None) |
| 195 | +inspector_patch_print = patch.object(Inspector, "print_data_tabular", return_value="") |
| 196 | +inspector_patch.start() |
| 197 | +inspector_patch_print.start() |
| 198 | +# sphinx_gallery_end_ignore |
| 199 | +etdump_path = "etdump.etdp" |
| 200 | +inspector = Inspector(etdump_path=etdump_path, etrecord=etrecord_path) |
| 201 | +# sphinx_gallery_start_ignore |
| 202 | +inspector.event_blocks = [] |
| 203 | +# sphinx_gallery_end_ignore |
| 204 | +inspector.print_data_tabular() |
| 205 | + |
| 206 | +# sphinx_gallery_start_ignore |
| 207 | +inspector_patch.stop() |
| 208 | +inspector_patch_print.stop() |
| 209 | +# sphinx_gallery_end_ignore |
| 210 | + |
| 211 | +###################################################################### |
| 212 | +# Analyzing with an Inspector |
| 213 | +# --------------------------- |
| 214 | +# |
| 215 | +# ``Inspector`` provides 2 ways of accessing ingested information: `EventBlocks <../sdk-inspector#eventblock-class>`__ |
| 216 | +# and ``DataFrames``. These mediums give users the ability to perform custom |
| 217 | +# analysis about their model performance. |
| 218 | +# |
| 219 | +# Below are examples usages, with both ``EventBlock`` and ``DataFrame`` approaches. |
| 220 | + |
| 221 | +# Set Up |
| 222 | +import pprint as pp |
| 223 | + |
| 224 | +import pandas as pd |
| 225 | + |
| 226 | +pd.set_option("display.max_colwidth", None) |
| 227 | +pd.set_option("display.max_columns", None) |
| 228 | + |
| 229 | +###################################################################### |
| 230 | +# If a user wants the raw profiling results, they would do something similar to |
| 231 | +# finding the raw runtime data of an ``addmm.out`` event. |
| 232 | + |
| 233 | +for event_block in inspector.event_blocks: |
| 234 | + # Via EventBlocks |
| 235 | + for event in event_block.events: |
| 236 | + if event.name == "native_call_addmm.out": |
| 237 | + print(event.name, event.perf_data.raw) |
| 238 | + |
| 239 | + # Via Dataframe |
| 240 | + df = event_block.to_dataframe() |
| 241 | + df = df[df.event_name == "native_call_addmm.out"] |
| 242 | + print(df[["event_name", "raw"]]) |
| 243 | + print() |
| 244 | + |
| 245 | +###################################################################### |
| 246 | +# If a user wants to trace an operator back to their model code, they would do |
| 247 | +# something similar to finding the module hierarchy and stack trace of the |
| 248 | +# slowest ``convolution.out`` call. |
| 249 | + |
| 250 | +for event_block in inspector.event_blocks: |
| 251 | + # Via EventBlocks |
| 252 | + slowest = None |
| 253 | + for event in event_block.events: |
| 254 | + if event.name == "native_call_convolution.out": |
| 255 | + if slowest is None or event.perf_data.p50 > slowest.perf_data.p50: |
| 256 | + slowest = event |
| 257 | + if slowest is not None: |
| 258 | + print(slowest.name) |
| 259 | + print() |
| 260 | + pp.pprint(slowest.stack_traces) |
| 261 | + print() |
| 262 | + pp.pprint(slowest.module_hierarchy) |
| 263 | + |
| 264 | + # Via Dataframe |
| 265 | + df = event_block.to_dataframe() |
| 266 | + df = df[df.event_name == "native_call_convolution.out"] |
| 267 | + if len(df) > 0: |
| 268 | + slowest = df.loc[df["p50"].idxmax()] |
| 269 | + print(slowest.event_name) |
| 270 | + print() |
| 271 | + pp.pprint(slowest.stack_traces) |
| 272 | + print() |
| 273 | + pp.pprint(slowest.module_hierarchy) |
| 274 | + |
| 275 | +###################################################################### |
| 276 | +# If a user wants the total runtime of a module, they can use |
| 277 | +# ``find_total_for_module``. |
| 278 | + |
| 279 | +print(inspector.find_total_for_module("L__self__")) |
| 280 | +print(inspector.find_total_for_module("L__self___conv2")) |
| 281 | + |
| 282 | +###################################################################### |
| 283 | +# Note: ``find_total_for_module`` is a special first class method of |
| 284 | +# `Inspector <../sdk-inspector.html>`__ |
| 285 | + |
| 286 | +###################################################################### |
| 287 | +# Conclusion |
| 288 | +# ---------- |
| 289 | +# |
| 290 | +# In this tutorial, we learned about the steps required to consume an ExecuTorch |
| 291 | +# model with the ExecuTorch Developer Tools. It also showed how to use the Inspector APIs |
| 292 | +# to analyze the model run results. |
| 293 | +# |
| 294 | +# Links Mentioned |
| 295 | +# ^^^^^^^^^^^^^^^ |
| 296 | +# |
| 297 | +# - `ExecuTorch Developer Tools Overview <../sdk-overview.html>`__ |
| 298 | +# - `ETRecord <../sdk-etrecord.html>`__ |
| 299 | +# - `ETDump <../sdk-etdump.html>`__ |
| 300 | +# - `Inspector <../sdk-inspector.html>`__ |
0 commit comments