Skip to content

Commit 26bbb97

Browse files
fairydreamingsszymczy
authored andcommitted
llama : add support for lora adapters in T5 model (ggml-org#8938)
Co-authored-by: Stanisław Szymczyk <[email protected]>
1 parent b954191 commit 26bbb97

File tree

1 file changed

+13
-13
lines changed

1 file changed

+13
-13
lines changed

src/llama.cpp

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -13171,13 +13171,13 @@ struct llm_build_context {
1317113171

1317213172
// self-attention
1317313173
{
13174-
struct ggml_tensor * Qcur = ggml_mul_mat(ctx0, model.layers[il].wq_enc, cur);
13174+
struct ggml_tensor * Qcur = llm_build_lora_mm(lctx, ctx0, model.layers[il].wq_enc, cur);
1317513175
cb(Qcur, "Qcur", il);
1317613176

13177-
struct ggml_tensor * Kcur = ggml_mul_mat(ctx0, model.layers[il].wk_enc, cur);
13177+
struct ggml_tensor * Kcur = llm_build_lora_mm(lctx, ctx0, model.layers[il].wk_enc, cur);
1317813178
cb(Kcur, "Kcur", il);
1317913179

13180-
struct ggml_tensor * Vcur = ggml_mul_mat(ctx0, model.layers[il].wv_enc, cur);
13180+
struct ggml_tensor * Vcur = llm_build_lora_mm(lctx, ctx0, model.layers[il].wv_enc, cur);
1318113181
cb(Vcur, "Vcur", il);
1318213182

1318313183
Qcur = ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens);
@@ -13211,7 +13211,7 @@ struct llm_build_context {
1321113211

1321213212
ggml_build_forward_expand(gf, cur);
1321313213

13214-
cur = ggml_mul_mat(ctx0, model.layers[il].wo_enc, cur);
13214+
cur = llm_build_lora_mm(lctx, ctx0, model.layers[il].wo_enc, cur);
1321513215
cb(cur, "kqv_out", il);
1321613216
}
1321713217

@@ -13285,13 +13285,13 @@ struct llm_build_context {
1328513285

1328613286
// self-attention
1328713287
{
13288-
struct ggml_tensor * Qcur = ggml_mul_mat(ctx0, model.layers[il].wq, cur);
13288+
struct ggml_tensor * Qcur = llm_build_lora_mm(lctx, ctx0, model.layers[il].wq, cur);
1328913289
cb(Qcur, "Qcur", il);
1329013290

13291-
struct ggml_tensor * Kcur = ggml_mul_mat(ctx0, model.layers[il].wk, cur);
13291+
struct ggml_tensor * Kcur = llm_build_lora_mm(lctx, ctx0, model.layers[il].wk, cur);
1329213292
cb(Kcur, "Kcur", il);
1329313293

13294-
struct ggml_tensor * Vcur = ggml_mul_mat(ctx0, model.layers[il].wv, cur);
13294+
struct ggml_tensor * Vcur = llm_build_lora_mm(lctx, ctx0, model.layers[il].wv, cur);
1329513295
cb(Vcur, "Vcur", il);
1329613296

1329713297
llm_build_kv_store(ctx0, hparams, cparams, kv_self, gf, Kcur, Vcur, n_tokens, kv_head, cb, il);
@@ -13338,7 +13338,7 @@ struct llm_build_context {
1333813338

1333913339
ggml_build_forward_expand(gf, cur);
1334013340

13341-
cur = ggml_mul_mat(ctx0, model.layers[il].wo, cur);
13341+
cur = llm_build_lora_mm(lctx, ctx0, model.layers[il].wo, cur);
1334213342
cb(cur, "kqv_out", il);
1334313343
}
1334413344

@@ -13355,13 +13355,13 @@ struct llm_build_context {
1335513355

1335613356
// cross-attention
1335713357
{
13358-
struct ggml_tensor * Qcur = ggml_mul_mat(ctx0, model.layers[il].wq_cross, cur);
13358+
struct ggml_tensor * Qcur = llm_build_lora_mm(lctx, ctx0, model.layers[il].wq_cross, cur);
1335913359
cb(Qcur, "Qcur", il);
1336013360

13361-
struct ggml_tensor * Kcur = ggml_mul_mat(ctx0, model.layers[il].wk_cross, embd_enc);
13361+
struct ggml_tensor * Kcur = llm_build_lora_mm(lctx, ctx0, model.layers[il].wk_cross, embd_enc);
1336213362
cb(Kcur, "Kcur", il);
1336313363

13364-
struct ggml_tensor * Vcur = ggml_mul_mat(ctx0, model.layers[il].wv_cross, embd_enc);
13364+
struct ggml_tensor * Vcur = llm_build_lora_mm(lctx, ctx0, model.layers[il].wv_cross, embd_enc);
1336513365
cb(Vcur, "Vcur", il);
1336613366

1336713367
Qcur = ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens);
@@ -13390,7 +13390,7 @@ struct llm_build_context {
1339013390

1339113391
ggml_build_forward_expand(gf, cur);
1339213392

13393-
cur = ggml_mul_mat(ctx0, model.layers[il].wo_cross, cur);
13393+
cur = llm_build_lora_mm(lctx, ctx0, model.layers[il].wo_cross, cur);
1339413394
cb(cur, "kqv_out", il);
1339513395
}
1339613396

@@ -13447,7 +13447,7 @@ struct llm_build_context {
1344713447
cb(cur, "result_norm", -1);
1344813448

1344913449
// lm_head
13450-
cur = ggml_mul_mat(ctx0, model.output, cur);
13450+
cur = llm_build_lora_mm(lctx, ctx0, model.output, cur);
1345113451
cb(cur, "result_output", -1);
1345213452
}
1345313453

0 commit comments

Comments
 (0)