Revert D54763963: Add etdump generation to llama_runner

mcr229 · facebook-github-bot · commit d52ebdcbeb69 · 2024-03-22T14:31:19.000-07:00
Differential Revision:
D54763963

Original commit changeset: 2e0bcaf0451d

Original Phabricator Diff: D54763963

fbshipit-source-id: c51e943773fc5bf73e82b7517b17b69013494298
diff --git a/examples/models/llama2/main.cpp b/examples/models/llama2/main.cpp
@@ -39,11 +39,6 @@ DEFINE_int32(
     -1,
     "Number of CPU threads for inference. Defaults to -1, which implies we'll use a heuristic to derive the # of performant cores for a specific device.");
 
-DEFINE_string(
-    etdump_path,
-    "llama_etdump.etdp",
-    "Where to write the llama etdump.");
-
 int32_t main(int32_t argc, char** argv) {
   gflags::ParseCommandLineFlags(&argc, &argv, true);
 
@@ -79,8 +74,5 @@ int32_t main(int32_t argc, char** argv) {
   // generate
   runner.generate(prompt, seq_len);
 
-  // dump etdump profiling data
-  runner.dump_etdump(FLAGS_etdump_path);
-
   return 0;
 }
diff --git a/examples/models/llama2/runner/runner.cpp b/examples/models/llama2/runner/runner.cpp
@@ -24,7 +24,6 @@
 #include <executorch/runtime/core/exec_aten/exec_aten.h>
 #include <executorch/runtime/core/exec_aten/util/scalar_type_util.h>
 #include <executorch/runtime/platform/log.h>
-#include <executorch/sdk/etdump/etdump_flatcc.h>
 
 namespace torch::executor {
 namespace {
@@ -35,14 +34,11 @@ Runner::Runner(
     const std::string& model_path,
     const std::string& tokenizer_path,
     const float temperature)
-    : tokenizer_path_(tokenizer_path), temperature_(temperature) {
-  std::unique_ptr<torch::executor::ETDumpGen> etdump_gen_ =
-      std::make_unique<torch::executor::ETDumpGen>();
-
-  module_ = std::make_unique<Module>(
-      model_path,
-      Module::MlockConfig::UseMlockIgnoreErrors,
-      std::move(etdump_gen_));
+    : module_(std::make_unique<Module>(
+          model_path,
+          Module::MlockConfig::UseMlockIgnoreErrors)),
+      tokenizer_path_(tokenizer_path),
+      temperature_(temperature) {
   ET_LOG(
       Info,
       "Creating LLaMa runner: model_path=%s, tokenizer_path=%s",
@@ -390,25 +386,6 @@ Error Runner::generate(
   return Error::Ok;
 }
 
-Error Runner::dump_etdump(std::string etdump_path) {
-#ifdef ET_EVENT_TRACER_ENABLED
-  torch::executor::ETDumpGen* etdump_gen =
-      static_cast<torch::executor::ETDumpGen*>(module_->event_tracer());
-
-  ET_LOG(Info, "ETDump size: %zu blocks", etdump_gen->get_num_blocks());
-  etdump_result result = etdump_gen->get_etdump_data();
-  if (result.buf != nullptr && result.size > 0) {
-    // On a device with a file system users can just write it out
-    // to the file-system.
-    FILE* f = fopen(etdump_path.c_str(), "w+");
-    fwrite((uint8_t*)result.buf, 1, result.size, f);
-    fclose(f);
-    free(result.buf);
-  }
-#endif
-  return Error::Ok;
-}
-
 void Runner::TimeStamps::printReport(
     const int64_t& num_prompt_tokens,
     const int64_t& num_generated_tokens) {
diff --git a/examples/models/llama2/runner/runner.h b/examples/models/llama2/runner/runner.h
@@ -20,7 +20,6 @@
 #include <executorch/examples/models/llama2/sampler/sampler.h>
 #include <executorch/examples/models/llama2/tokenizer/tokenizer.h>
 #include <executorch/extension/module/module.h>
-#include <executorch/sdk/etdump/etdump_flatcc.h>
 
 namespace torch::executor {
 
@@ -38,7 +37,6 @@ class Runner {
       int32_t seq_len = 128,
       std::function<void(const std::string&)> callback = {});
   void stop();
-  Error dump_etdump(std::string etdump_path);
 
  private:
   // metadata
@@ -94,7 +92,6 @@ class Runner {
         const int64_t& num_prompt_tokens,
         const int64_t& num_generated_tokens);
   };
-
   TimeStamps timers_;
 };
 
diff --git a/examples/models/llama2/runner/targets.bzl b/examples/models/llama2/runner/targets.bzl
@@ -36,7 +36,6 @@ def define_common_targets():
                 "//executorch/extension/module:module" + aten_suffix,
                 "//executorch/kernels/quantized:generated_lib" + aten_suffix,
                 "//executorch/runtime/core/exec_aten:lib" + aten_suffix,
-                "//executorch/sdk/etdump:etdump_flatcc",
             ] + (_get_operator_lib(aten)) + ([
                 # Vulkan API currently cannot build on some platforms (e.g. Apple, FBCODE)
                 # Therefore enable it explicitly for now to avoid failing tests