Add buck build for static llama runner

cccclai · web-flow · commit 50b4ac3b1b41 · 2024-11-20T22:51:55.000-08:00
Differential Revision: D66107963 Pull Request resolved: #6950
diff --git a/examples/qualcomm/oss_scripts/llama3_2/TARGETS b/examples/qualcomm/oss_scripts/llama3_2/TARGETS
@@ -0,0 +1,8 @@
+# Any targets that should be shared between fbcode and xplat must be defined in
+# targets.bzl. This file can contain xplat-only targets.
+
+load(":targets.bzl", "define_common_targets")
+
+oncall("executorch")
+
+define_common_targets()
diff --git a/examples/qualcomm/oss_scripts/llama3_2/runner/runner.cpp b/examples/qualcomm/oss_scripts/llama3_2/runner/runner.cpp
@@ -43,12 +43,12 @@ Runner::Runner(
     const std::vector<std::string>& models_path,
     const std::string& tokenizer_path,
     const float temperature)
-    : tokenizer_path_(tokenizer_path),
-      temperature_(temperature),
-      n_bos_(1),
+    : n_bos_(1),
       n_eos_(1),
       vocab_size_(QNN_LLAMA3_2_LOGITS),
       max_seq_len_(QNN_LLAMA3_2_SEQLEN),
+      tokenizer_path_(tokenizer_path),
+      temperature_(temperature),
       stats_({}) {
   for (size_t i = 0; i < models_path.size(); ++i) {
     modules_.push_back(std::make_shared<Module>(
@@ -58,7 +58,9 @@ Runner::Runner(
   ET_LOG(Info, "creating runner: tokenizer_path=%s", tokenizer_path_.c_str());
 
   tokenizer_ = example::get_tiktoken_for_llama();
-  tokenizer_->load(tokenizer_path_);
+  Error err = tokenizer_->load(tokenizer_path_);
+  ET_CHECK_MSG(
+      err == Error::Ok, "failed to load tokenizer %s", tokenizer_path_.c_str());
   eos_id_.insert(tokenizer_->encode("<|eot_id|>", 0, 0).get()[0]);
   bos_id_ = tokenizer_->bos_tok();
   eos_id_.insert(tokenizer_->eos_tok());
diff --git a/examples/qualcomm/oss_scripts/llama3_2/targets.bzl b/examples/qualcomm/oss_scripts/llama3_2/targets.bzl
@@ -0,0 +1,53 @@
+load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "get_oss_build_kwargs", "runtime")
+load("@fbsource//xplat/executorch/backends/qualcomm/qnn_version.bzl", "get_qnn_library_verision")
+
+def define_common_targets():
+    runtime.cxx_library(
+        name = "runner_lib",
+        srcs = glob(
+            [
+                "runner/*.cpp",
+            ],
+        ),
+        exported_headers = glob([
+            "runner/*.h",
+        ]),
+        compiler_flags = [
+            "-Wno-global-constructors",
+            "-Wunused-command-line-argument",
+        ],
+        deps = [
+            "//executorch/extension/llm/runner:stats",
+            "//executorch/extension/tensor:tensor",
+            "fbsource//third-party/qualcomm/qnn/qnn-{0}:api".format(get_qnn_library_verision()),
+        ],
+        exported_deps = [
+            "//executorch/extension/module:module",
+            "//executorch/extension/llm/sampler:sampler",
+            "//executorch/examples/models/llama/tokenizer:tiktoken",
+            "//executorch/extension/evalue_util:print_evalue",
+            "//executorch/backends/qualcomm/runtime:runtime",
+        ],
+        external_deps = [
+            "gflags",
+        ],
+        **get_oss_build_kwargs()
+    )
+
+    runtime.cxx_binary(
+        name = "qnn_llama3_2_runner",
+        srcs = [
+            "qnn_llama3_2_runner.cpp",
+        ],
+        compiler_flags = [
+            "-Wno-global-constructors",
+        ],
+        deps = [
+            ":runner_lib",
+            "//executorch/extension/threadpool:threadpool", # this depeneency shouldn't be needed. But it fails to build..
+        ],
+        external_deps = [
+            "gflags",
+        ],
+        **get_oss_build_kwargs()
+    )