@@ -13171,13 +13171,13 @@ struct llm_build_context {
13171
13171
13172
13172
// self-attention
13173
13173
{
13174
- struct ggml_tensor * Qcur = ggml_mul_mat( ctx0, model.layers[il].wq_enc, cur);
13174
+ struct ggml_tensor * Qcur = llm_build_lora_mm(lctx, ctx0, model.layers[il].wq_enc, cur);
13175
13175
cb(Qcur, "Qcur", il);
13176
13176
13177
- struct ggml_tensor * Kcur = ggml_mul_mat( ctx0, model.layers[il].wk_enc, cur);
13177
+ struct ggml_tensor * Kcur = llm_build_lora_mm(lctx, ctx0, model.layers[il].wk_enc, cur);
13178
13178
cb(Kcur, "Kcur", il);
13179
13179
13180
- struct ggml_tensor * Vcur = ggml_mul_mat( ctx0, model.layers[il].wv_enc, cur);
13180
+ struct ggml_tensor * Vcur = llm_build_lora_mm(lctx, ctx0, model.layers[il].wv_enc, cur);
13181
13181
cb(Vcur, "Vcur", il);
13182
13182
13183
13183
Qcur = ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens);
@@ -13211,7 +13211,7 @@ struct llm_build_context {
13211
13211
13212
13212
ggml_build_forward_expand(gf, cur);
13213
13213
13214
- cur = ggml_mul_mat( ctx0, model.layers[il].wo_enc, cur);
13214
+ cur = llm_build_lora_mm(lctx, ctx0, model.layers[il].wo_enc, cur);
13215
13215
cb(cur, "kqv_out", il);
13216
13216
}
13217
13217
@@ -13285,13 +13285,13 @@ struct llm_build_context {
13285
13285
13286
13286
// self-attention
13287
13287
{
13288
- struct ggml_tensor * Qcur = ggml_mul_mat( ctx0, model.layers[il].wq, cur);
13288
+ struct ggml_tensor * Qcur = llm_build_lora_mm(lctx, ctx0, model.layers[il].wq, cur);
13289
13289
cb(Qcur, "Qcur", il);
13290
13290
13291
- struct ggml_tensor * Kcur = ggml_mul_mat( ctx0, model.layers[il].wk, cur);
13291
+ struct ggml_tensor * Kcur = llm_build_lora_mm(lctx, ctx0, model.layers[il].wk, cur);
13292
13292
cb(Kcur, "Kcur", il);
13293
13293
13294
- struct ggml_tensor * Vcur = ggml_mul_mat( ctx0, model.layers[il].wv, cur);
13294
+ struct ggml_tensor * Vcur = llm_build_lora_mm(lctx, ctx0, model.layers[il].wv, cur);
13295
13295
cb(Vcur, "Vcur", il);
13296
13296
13297
13297
llm_build_kv_store(ctx0, hparams, cparams, kv_self, gf, Kcur, Vcur, n_tokens, kv_head, cb, il);
@@ -13338,7 +13338,7 @@ struct llm_build_context {
13338
13338
13339
13339
ggml_build_forward_expand(gf, cur);
13340
13340
13341
- cur = ggml_mul_mat( ctx0, model.layers[il].wo, cur);
13341
+ cur = llm_build_lora_mm(lctx, ctx0, model.layers[il].wo, cur);
13342
13342
cb(cur, "kqv_out", il);
13343
13343
}
13344
13344
@@ -13355,13 +13355,13 @@ struct llm_build_context {
13355
13355
13356
13356
// cross-attention
13357
13357
{
13358
- struct ggml_tensor * Qcur = ggml_mul_mat( ctx0, model.layers[il].wq_cross, cur);
13358
+ struct ggml_tensor * Qcur = llm_build_lora_mm(lctx, ctx0, model.layers[il].wq_cross, cur);
13359
13359
cb(Qcur, "Qcur", il);
13360
13360
13361
- struct ggml_tensor * Kcur = ggml_mul_mat( ctx0, model.layers[il].wk_cross, embd_enc);
13361
+ struct ggml_tensor * Kcur = llm_build_lora_mm(lctx, ctx0, model.layers[il].wk_cross, embd_enc);
13362
13362
cb(Kcur, "Kcur", il);
13363
13363
13364
- struct ggml_tensor * Vcur = ggml_mul_mat( ctx0, model.layers[il].wv_cross, embd_enc);
13364
+ struct ggml_tensor * Vcur = llm_build_lora_mm(lctx, ctx0, model.layers[il].wv_cross, embd_enc);
13365
13365
cb(Vcur, "Vcur", il);
13366
13366
13367
13367
Qcur = ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens);
@@ -13390,7 +13390,7 @@ struct llm_build_context {
13390
13390
13391
13391
ggml_build_forward_expand(gf, cur);
13392
13392
13393
- cur = ggml_mul_mat( ctx0, model.layers[il].wo_cross, cur);
13393
+ cur = llm_build_lora_mm(lctx, ctx0, model.layers[il].wo_cross, cur);
13394
13394
cb(cur, "kqv_out", il);
13395
13395
}
13396
13396
@@ -13447,7 +13447,7 @@ struct llm_build_context {
13447
13447
cb(cur, "result_norm", -1);
13448
13448
13449
13449
// lm_head
13450
- cur = ggml_mul_mat( ctx0, model.output, cur);
13450
+ cur = llm_build_lora_mm(lctx, ctx0, model.output, cur);
13451
13451
cb(cur, "result_output", -1);
13452
13452
}
13453
13453
0 commit comments