Add event tracing and ETDumps to executor_runner (#5027)

benkli01 · web-flow · commit 282c137f7f60 · 2025-01-29T15:31:19.000-08:00
* Add event tracing and ETDumps to executor_runner

- Enabled via EXECUTORCH_ENABLE_EVENT_TRACER
- Add flag 'etdump_path' to specify the file path for the ETDump file
- Add flag 'num_executions' for number of iterations to run
- Create and pass event tracer 'ETDumpGen'
- Save ETDump to disk
- Update docs to reflect the changes

Signed-off-by: Benjamin Klimczak &lt;benjamin.klimczak@arm.com&gt;
Change-Id: I7e8e8b7f21453bb8d88fa2b9c2ef66c532f3ea46

* Fix comments during code review

- Raise a CMake error if event tracing is enabled without the devtools
- Re-factoring of the changes in the portable executor_runner
- Minor fix in docs

Change-Id: Ia50fef8172f678f9cbe2b33e2178780ff983f335
Signed-off-by: Benjamin Klimczak &lt;benjamin.klimczak@arm.com&gt;

* Small fix for use of flag `FLAGS_etdump`

Change-Id: I0ebb22636cdd64aea24bcee51cba05496ed78b1f

* Another fix for case ET_EVENT_TRACER_ENABLED=OFF

Change-Id: I7d72e4d8f46ec727a60c9553851d5b71da8e91d4
Signed-off-by: Benjamin Klimczak &lt;benjamin.klimczak@arm.com&gt;

* Revert use of FLATCCRT_LIB to flatccrt

Signed-off-by: Benjamin Klimczak &lt;benjamin.klimczak@arm.com&gt;
Change-Id: I5e7d8ef5d66bc3d5de36ea451b31fb3bdcd42d09

* Fix linker issue when building executor_runner

- Remove explicit addition of `-DET_EVENT_TRACER_ENABLED` from
  backends/qualcomm/CMakeLists.txt as setting the definition without
  enabling cmake flag `EXECUTORCH_ENABLE_EVENT_TRACER` caused issues
  when building the executor_runner.
- Replace deprecated namespace `torch::executor` with
  `executorch::etdump` in the executor_runner.cpp.

Signed-off-by: Benjamin Klimczak &lt;benjamin.klimczak@arm.com&gt;
Change-Id: Iadff38374e661f42e394dc69903548922ca08aea

---------

Signed-off-by: Benjamin Klimczak &lt;benjamin.klimczak@arm.com&gt;
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -1,4 +1,5 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
+# Copyright 2024-2025 Arm Limited and/or its affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the BSD-style license found in the
@@ -819,6 +820,14 @@ if(EXECUTORCH_BUILD_EXECUTOR_RUNNER)
     list(APPEND _executor_runner_libs quantized_ops_lib)
   endif()
 
+  if(EXECUTORCH_ENABLE_EVENT_TRACER)
+    if(EXECUTORCH_BUILD_DEVTOOLS)
+      list(APPEND _executor_runner_libs etdump flatccrt)
+    else()
+      message(SEND_ERROR "Use of 'EXECUTORCH_ENABLE_EVENT_TRACER' requires 'EXECUTORCH_BUILD_DEVTOOLS' to be enabled.")
+    endif()
+  endif()
+
   add_executable(executor_runner ${_executor_runner__srcs})
   if(CMAKE_BUILD_TYPE STREQUAL "Release")
     if(APPLE)
diff --git a/backends/qualcomm/CMakeLists.txt b/backends/qualcomm/CMakeLists.txt
@@ -1,4 +1,5 @@
 # Copyright (c) Qualcomm Innovation Center, Inc.
+# Copyright 2025 Arm Limited and/or its affiliates.
 # All rights reserved
 #
 # This source code is licensed under the BSD-style license found in the
@@ -199,11 +200,6 @@ target_link_libraries(
 #
 target_link_options_shared_lib(qnn_executorch_backend)
 
-#
-# add compile option
-#
-target_compile_options(executorch PUBLIC -DET_EVENT_TRACER_ENABLED)
-
 #
 # add sources
 #
diff --git a/backends/xnnpack/CMakeLists.txt b/backends/xnnpack/CMakeLists.txt
@@ -1,4 +1,5 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
+# Copyright 2024-2025 Arm Limited and/or its affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the BSD-style license found in the
@@ -128,8 +129,17 @@ if(NOT CMAKE_TOOLCHAIN_FILE MATCHES ".*(iOS|ios\.toolchain)\.cmake$")
   #
   list(TRANSFORM _xnn_executor_runner__srcs PREPEND "${EXECUTORCH_ROOT}/")
   add_executable(xnn_executor_runner ${_xnn_executor_runner__srcs})
+
+  if(EXECUTORCH_ENABLE_EVENT_TRACER)
+    if(EXECUTORCH_BUILD_DEVTOOLS)
+      list(APPEND xnn_executor_runner_libs etdump)
+    else()
+      message(SEND_ERROR "Use of 'EXECUTORCH_ENABLE_EVENT_TRACER' requires 'EXECUTORCH_BUILD_DEVTOOLS' to be enabled.")
+    endif()
+  endif()
+
   target_link_libraries(
-    xnn_executor_runner xnnpack_backend gflags portable_ops_lib
+    xnn_executor_runner gflags portable_ops_lib ${xnn_executor_runner_libs}
   )
   target_compile_options(xnn_executor_runner PUBLIC ${_common_compile_options})
 endif()
diff --git a/docs/source/native-delegates-executorch-xnnpack-delegate.md b/docs/source/native-delegates-executorch-xnnpack-delegate.md
@@ -70,7 +70,7 @@ Since weight packing creates an extra copy of the weights inside XNNPACK, We fre
 When executing the XNNPACK subgraphs, we prepare the tensor inputs and outputs and feed them to the XNNPACK runtime graph. After executing the runtime graph, the output pointers are filled with the computed tensors.
 
 #### **Profiling**
-We have enabled basic profiling for XNNPACK delegate that can be enabled with the following compiler flag `-DENABLE_XNNPACK_PROFILING`. With ExecuTorch's Developer Tools integration, you can also now use the Developer Tools to profile the model. You can follow the steps in [Using the ExecuTorch Developer Tools to Profile a Model](./tutorials/devtools-integration-tutorial) on how to profile ExecuTorch models and use Developer Tools' Inspector API to view XNNPACK's internal profiling information.
+We have enabled basic profiling for the XNNPACK delegate that can be enabled with the compiler flag `-DEXECUTORCH_ENABLE_EVENT_TRACER` (add `-DENABLE_XNNPACK_PROFILING` for additional details). With ExecuTorch's Developer Tools integration, you can also now use the Developer Tools to profile the model. You can follow the steps in [Using the ExecuTorch Developer Tools to Profile a Model](./tutorials/devtools-integration-tutorial) on how to profile ExecuTorch models and use Developer Tools' Inspector API to view XNNPACK's internal profiling information. An example implementation is available in the `xnn_executor_runner` (see [tutorial here](tutorial-xnnpack-delegate-lowering.md#profiling)).
 
 
 [comment]: <> (TODO: Refactor quantizer to a more official quantization doc)
diff --git a/docs/source/tutorial-xnnpack-delegate-lowering.md b/docs/source/tutorial-xnnpack-delegate-lowering.md
@@ -177,3 +177,6 @@ Now you should be able to find the executable built at `./cmake-out/backends/xnn
 
 ## Building and Linking with the XNNPACK Backend
 You can build the XNNPACK backend [CMake target](https://github.com/pytorch/executorch/blob/main/backends/xnnpack/CMakeLists.txt#L83), and link it with your application binary such as an Android or iOS application. For more information on this you may take a look at this [resource](demo-apps-android.md) next.
+
+## Profiling
+To enable profiling in the `xnn_executor_runner` pass the flags `-DEXECUTORCH_ENABLE_EVENT_TRACER=ON` and `-DEXECUTORCH_BUILD_DEVTOOLS=ON` to the build command (add `-DENABLE_XNNPACK_PROFILING=ON` for additional details). This will enable ETDump generation when running the inference and enables command line flags for profiling (see `xnn_executor_runner --help` for details).
diff --git a/examples/portable/executor_runner/executor_runner.cpp b/examples/portable/executor_runner/executor_runner.cpp
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * Copyright 2024-2025 Arm Limited and/or its affiliates.
  * All rights reserved.
  *
  * This source code is licensed under the BSD-style license found in the
@@ -25,10 +26,14 @@
 #include <executorch/extension/data_loader/file_data_loader.h>
 #include <executorch/extension/evalue_util/print_evalue.h>
 #include <executorch/extension/runner_util/inputs.h>
+#include <executorch/runtime/core/event_tracer.h>
 #include <executorch/runtime/executor/method.h>
 #include <executorch/runtime/executor/program.h>
 #include <executorch/runtime/platform/log.h>
 #include <executorch/runtime/platform/runtime.h>
+#ifdef ET_EVENT_TRACER_ENABLED
+#include <executorch/devtools/etdump/etdump_flatcc.h>
+#endif // ET_EVENT_TRACER_ENABLED
 
 static uint8_t method_allocator_pool[4 * 1024U * 1024U]; // 4 MB
 
@@ -38,10 +43,15 @@ DEFINE_string(
     model_path,
     "model.pte",
     "Model serialized in flatbuffer format.");
+DEFINE_uint32(num_executions, 1, "Number of times to run the model.");
+#ifdef ET_EVENT_TRACER_ENABLED
+DEFINE_string(etdump_path, "model.etdump", "Write ETDump data to this path.");
+#endif // ET_EVENT_TRACER_ENABLED
 
 using executorch::extension::FileDataLoader;
 using executorch::runtime::Error;
 using executorch::runtime::EValue;
+using executorch::runtime::EventTracer;
 using executorch::runtime::HierarchicalAllocator;
 using executorch::runtime::MemoryAllocator;
 using executorch::runtime::MemoryManager;
@@ -51,6 +61,56 @@ using executorch::runtime::Program;
 using executorch::runtime::Result;
 using executorch::runtime::Span;
 
+/// Helper to manage resources for ETDump generation
+class EventTraceManager {
+ public:
+  EventTraceManager() : event_tracer_ptr_(nullptr) {
+#ifdef ET_EVENT_TRACER_ENABLED
+    event_tracer_ptr_ = std::make_shared<executorch::etdump::ETDumpGen>();
+#endif // ET_EVENT_TRACER_ENABLED
+  }
+
+  EventTracer* get_event_tracer() const {
+    return event_tracer_ptr_.get();
+  };
+
+  Error write_etdump_to_file() const {
+    EventTracer* const event_tracer_ptr = get_event_tracer();
+    if (!event_tracer_ptr) {
+      return Error::NotSupported;
+    }
+
+#ifdef ET_EVENT_TRACER_ENABLED
+    executorch::etdump::ETDumpGen* const etdump_ptr =
+        static_cast<executorch::etdump::ETDumpGen*>(event_tracer_ptr);
+
+    const char* filename = FLAGS_etdump_path.c_str();
+
+    std::unique_ptr<FILE, decltype(&fclose)> etdump_file(
+        fopen(filename, "w+"), fclose);
+    if (!etdump_file) {
+      ET_LOG(Error, "Failed to open ETDump file at %s.", filename);
+      return Error::AccessFailed;
+    }
+
+    executorch::etdump::ETDumpResult result = etdump_ptr->get_etdump_data();
+    if (result.buf != nullptr && result.size > 0) {
+      fwrite((uint8_t*)result.buf, 1, result.size, etdump_file.get());
+      free(result.buf);
+      ET_LOG(Info, "ETDump written to file '%s'.", filename);
+    } else {
+      ET_LOG(Error, "No ETDump data available!");
+      return Error::NotFound;
+    }
+#endif // ET_EVENT_TRACER_ENABLED
+
+    return Error::Ok;
+  }
+
+ private:
+  std::shared_ptr<EventTracer> event_tracer_ptr_;
+};
+
 int main(int argc, char** argv) {
   executorch::runtime::runtime_init();
 
@@ -158,8 +218,9 @@ int main(int argc, char** argv) {
   // the method can mutate the memory-planned buffers, so the method should only
   // be used by a single thread at at time, but it can be reused.
   //
-
-  Result<Method> method = program->load_method(method_name, &memory_manager);
+  EventTraceManager tracer;
+  Result<Method> method = program->load_method(
+      method_name, &memory_manager, tracer.get_event_tracer());
   ET_CHECK_MSG(
       method.ok(),
       "Loading of method %s failed with status 0x%" PRIx32,
@@ -178,24 +239,36 @@ int main(int argc, char** argv) {
   ET_LOG(Info, "Inputs prepared.");
 
   // Run the model.
-  Error status = method->execute();
-  ET_CHECK_MSG(
-      status == Error::Ok,
-      "Execution of method %s failed with status 0x%" PRIx32,
-      method_name,
-      (uint32_t)status);
-  ET_LOG(Info, "Model executed successfully.");
+  for (uint32_t i = 0; i < FLAGS_num_executions; i++) {
+    Error status = method->execute();
+    ET_CHECK_MSG(
+        status == Error::Ok,
+        "Execution of method %s failed with status 0x%" PRIx32,
+        method_name,
+        (uint32_t)status);
+  }
+  ET_LOG(
+      Info,
+      "Model executed successfully %" PRIu32 " time(s).",
+      FLAGS_num_executions);
 
   // Print the outputs.
   std::vector<EValue> outputs(method->outputs_size());
   ET_LOG(Info, "%zu outputs: ", outputs.size());
-  status = method->get_outputs(outputs.data(), outputs.size());
+  Error status = method->get_outputs(outputs.data(), outputs.size());
   ET_CHECK(status == Error::Ok);
   // Print the first and last 100 elements of long lists of scalars.
   std::cout << executorch::extension::evalue_edge_items(100);
   for (int i = 0; i < outputs.size(); ++i) {
     std::cout << "Output " << i << ": " << outputs[i] << std::endl;
   }
 
+  if (tracer.get_event_tracer()) {
+    // Dump ETDump data containing profiling/debugging data to file specified in
+    // command line flag.
+    Error status = tracer.write_etdump_to_file();
+    ET_CHECK_MSG(status == Error::Ok, "Failed to save ETDump file.");
+  }
+
   return 0;
 }

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,5 @@`
`1`	`1`	`# Copyright (c) Qualcomm Innovation Center, Inc.`
	`2`	`+# Copyright 2025 Arm Limited and/or its affiliates.`
`2`	`3`	`# All rights reserved`
`3`	`4`	`#`
`4`	`5`	`# This source code is licensed under the BSD-style license found in the`
`@@ -199,11 +200,6 @@ target_link_libraries(`
`199`	`200`	`#`
`200`	`201`	`target_link_options_shared_lib(qnn_executorch_backend)`
`201`	`202`
`202`		`-#`
`203`		`-# add compile option`
`204`		`-#`
`205`		`-target_compile_options(executorch PUBLIC -DET_EVENT_TRACER_ENABLED)`
`206`		`-`
`207`	`203`	`#`
`208`	`204`	`# add sources`
`209`	`205`	`#`