@@ -13780,14 +13780,14 @@ struct llm_build_context {
13780
13780
cb(Qcur, "Qcur", il);
13781
13781
}
13782
13782
13783
- struct ggml_tensor * Kcur = ggml_mul_mat( ctx0, model.layers[il].wk, cur);
13783
+ struct ggml_tensor * Kcur = llm_build_lora_mm(lctx, ctx0, model.layers[il].wk, cur);
13784
13784
cb(Kcur, "Kcur", il);
13785
13785
if (model.layers[il].bk) {
13786
13786
Kcur = ggml_add(ctx0, Kcur, model.layers[il].bk);
13787
13787
cb(Kcur, "Kcur", il);
13788
13788
}
13789
13789
13790
- struct ggml_tensor * Vcur = ggml_mul_mat( ctx0, model.layers[il].wv, cur);
13790
+ struct ggml_tensor * Vcur = llm_build_lora_mm(lctx, ctx0, model.layers[il].wv, cur);
13791
13791
cb(Vcur, "Vcur", il);
13792
13792
if (model.layers[il].bv) {
13793
13793
Vcur = ggml_add(ctx0, Vcur, model.layers[il].bv);
@@ -13856,7 +13856,7 @@ struct llm_build_context {
13856
13856
cb(cur, "result_norm", -1);
13857
13857
13858
13858
// lm_head
13859
- cur = ggml_mul_mat( ctx0, model.output, cur);
13859
+ cur = llm_build_lora_mm(lctx, ctx0, model.output, cur);
13860
13860
cb(cur, "result_output", -1);
13861
13861
13862
13862
ggml_build_forward_expand(gf, cur);
0 commit comments