context : move adapter code in the implementation [no ci]

ggerganov · ggerganov · commit 71d116958764 · 2025-01-20T09:22:04.000+02:00
diff --git a/src/llama-context.cpp b/src/llama-context.cpp
@@ -1788,6 +1788,43 @@ float * llama_get_embeddings_seq(struct llama_context * ctx, llama_seq_id seq_id
     return it->second.data();
 }
 
+// llama adapter API
+
+int32_t llama_set_adapter_lora(
+            struct llama_context * ctx,
+            struct llama_adapter_lora * adapter,
+            float scale) {
+    ctx->loras[adapter] = scale;
+    return 0;
+}
+
+int32_t llama_rm_adapter_lora(
+            struct llama_context * ctx,
+            struct llama_adapter_lora * adapter) {
+    auto pos = ctx->loras.find(adapter);
+    if (pos != ctx->loras.end()) {
+        ctx->loras.erase(pos);
+        return 0;
+    }
+
+    return -1;
+}
+
+void llama_clear_adapter_lora(struct llama_context * ctx) {
+    ctx->loras.clear();
+}
+
+int32_t llama_apply_adapter_cvec(
+        struct llama_context * ctx,
+                 const float * data,
+                      size_t   len,
+                     int32_t   n_embd,
+                     int32_t   il_start,
+                     int32_t   il_end) {
+    return ctx->cvec.apply(ctx->model, data, len, n_embd, il_start, il_end);
+}
+
+
 // llama state API
 
 // deprecated
diff --git a/src/llama.cpp b/src/llama.cpp
@@ -8322,40 +8322,6 @@ static int llama_encode_impl(
     return 0;
 }
 
-int32_t llama_set_adapter_lora(
-            struct llama_context * ctx,
-            struct llama_adapter_lora * adapter,
-            float scale) {
-    ctx->loras[adapter] = scale;
-    return 0;
-}
-
-int32_t llama_rm_adapter_lora(
-            struct llama_context * ctx,
-            struct llama_adapter_lora * adapter) {
-    auto pos = ctx->loras.find(adapter);
-    if (pos != ctx->loras.end()) {
-        ctx->loras.erase(pos);
-        return 0;
-    }
-
-    return -1;
-}
-
-void llama_clear_adapter_lora(struct llama_context * ctx) {
-    ctx->loras.clear();
-}
-
-int32_t llama_apply_adapter_cvec(
-        struct llama_context * ctx,
-                 const float * data,
-                      size_t   len,
-                     int32_t   n_embd,
-                     int32_t   il_start,
-                     int32_t   il_end) {
-    return ctx->cvec.apply(ctx->model, data, len, n_embd, il_start, il_end);
-}
-
 //
 // interface implementation
 //
@@ -8914,7 +8880,7 @@ struct llama_context * llama_new_context_with_model(
 }
 
 //
-// kv cache
+// kv cache view
 //
 
 struct llama_kv_cache_view llama_kv_cache_view_init(const llama_context * ctx, int32_t n_seq_max) {
@@ -8925,6 +8891,10 @@ void llama_kv_cache_view_update(const llama_context * ctx, llama_kv_cache_view *
     llama_kv_cache_view_update(view, ctx->kv_self);
 }
 
+//
+// kv cache
+//
+
 // deprecated
 int32_t llama_get_kv_cache_token_count(const llama_context * ctx) {
     return llama_kv_self_n_tokens(ctx);