Update llama.cpp to use instruct mode

thomasantony · thomasantony · commit ce3dd8b0b4b0 · 2023-03-19T13:00:24.000-07:00
diff --git a/llama.cpp b/llama.cpp
@@ -1176,6 +1176,18 @@ void llama_update_input(llama_context& ctx, const std::string& text)
     state.embd_inp.insert(state.embd_inp.end(), line_inp.begin(), line_inp.end());
     state.remaining_tokens -= line_inp.size();
 }
+/// @brief Updates the context and appends new input tokens (overloaded version)
+/// @param ctx
+/// @param tokens
+void llama_update_input(llama_context& ctx, const std::vector<gpt_vocab::id>& tokens)
+{
+    llama_state& state = *ctx.state;
+    llama_model& model = ctx.model;
+    const gpt_params& params = ctx.params;
+
+    state.embd_inp.insert(state.embd_inp.end(), tokens.begin(), tokens.end());
+    state.remaining_tokens -= tokens.size();
+}
 
 /// @brief  Ingests a batch of input tokens into the context
 /// @param ctx 
diff --git a/llama.h b/llama.h
@@ -50,6 +50,8 @@ const std::vector<gpt_vocab::id> llama_tokenize_text(const llama_context& ctx, c
 void llama_add_bos(llama_context& ctx);
 // Queues up input text to the model input
 void llama_update_input(llama_context& ctx, const std::string& text);
+// Queues up input tokens to the model input
+void llama_update_input(llama_context& ctx, const std::vector<gpt_vocab::id>& tokens);
 // Ingests input previously added using llama_update_input()
 void llama_ingest_input_batch(llama_context& ctx);
 // Ingests all input previously added using llama_update_input() in multiple batches