Android JNI llama cache temperature in class

kirklandsign · kirklandsign · commit e26af08b6649 · 2025-04-17T16:37:19.000-07:00
diff --git a/extension/android/jni/jni_layer_llama.cpp b/extension/android/jni/jni_layer_llama.cpp
@@ -114,6 +114,7 @@ class ExecuTorchLlmCallbackJni
 class ExecuTorchLlmJni : public facebook::jni::HybridClass<ExecuTorchLlmJni> {
  private:
   friend HybridBase;
+  float temperature_;
   int model_type_category_;
   std::unique_ptr<llm::IRunner> runner_;
   std::unique_ptr<llm::MultimodalRunner> multi_modal_runner_;
@@ -149,7 +150,7 @@ class ExecuTorchLlmJni : public facebook::jni::HybridClass<ExecuTorchLlmJni> {
       facebook::jni::alias_ref<jstring> data_path = nullptr) {
 #if defined(ET_USE_THREADPOOL)
     // Reserve 1 thread for the main thread.
-    uint32_t num_performant_cores =
+    int32_t num_performant_cores =
         ::executorch::extension::cpuinfo::get_num_performant_cores() - 1;
     if (num_performant_cores > 0) {
       ET_LOG(Info, "Resetting threadpool to %d threads", num_performant_cores);
@@ -169,20 +170,17 @@ class ExecuTorchLlmJni : public facebook::jni::HybridClass<ExecuTorchLlmJni> {
         runner_ = std::make_unique<example::Runner>(
             model_path->toStdString().c_str(),
             tokenizer_path->toStdString().c_str(),
-            temperature,
             data_path->toStdString().c_str());
       } else {
         runner_ = std::make_unique<example::Runner>(
             model_path->toStdString().c_str(),
-            tokenizer_path->toStdString().c_str(),
-            temperature);
+            tokenizer_path->toStdString().c_str());
       }
 #if defined(EXECUTORCH_BUILD_MEDIATEK)
     } else if (model_type_category == MODEL_TYPE_MEDIATEK_LLAMA) {
       runner_ = std::make_unique<MTKLlamaRunner>(
           model_path->toStdString().c_str(),
-          tokenizer_path->toStdString().c_str(),
-          temperature);
+          tokenizer_path->toStdString().c_str());
       // Interpret the model type as LLM
       model_type_category_ = MODEL_TYPE_CATEGORY_LLM;
 #endif
@@ -222,6 +220,7 @@ class ExecuTorchLlmJni : public facebook::jni::HybridClass<ExecuTorchLlmJni> {
       executorch::extension::llm::GenerationConfig config{
           .echo = static_cast<bool>(echo),
           .seq_len = seq_len,
+          .temperature = temperature_,
       };
       runner_->generate(
           prompt->toStdString(),