Skip to content

Commit 163d50a

Browse files
authored
fixes #7999 (adds control vectors to all build_XXX() functions in llama.cpp [needs testing] (#8060)
* fixes #7999 The `build_command_r` forgot to add the control vector. * Fixes qwen2 too * Fixed all models' control vectors * Removed double calls to `cb(cur, "l_out", il)` * Moved control vector logic to llama_control_vector:apply_to()
1 parent 6fcbf68 commit 163d50a

File tree

1 file changed

+73
-39
lines changed

1 file changed

+73
-39
lines changed

llama.cpp

Lines changed: 73 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -2368,13 +2368,21 @@ struct llama_control_vector {
23682368
int32_t layer_start = -1;
23692369
int32_t layer_end = -1;
23702370

2371-
ggml_tensor * tensor_for(int il) const {
2371+
struct ggml_tensor * tensor_for(int il) const {
23722372
if (il < 0 || il < layer_start || il > layer_end || (size_t) il >= tensors.size()) {
23732373
return nullptr;
23742374
}
23752375
return tensors[il];
23762376
}
23772377

2378+
struct ggml_tensor * apply_to(struct ggml_context * ctx, struct ggml_tensor * cur, int il) const {
2379+
ggml_tensor * layer_dir = tensor_for(il);
2380+
if (layer_dir != nullptr) {
2381+
cur = ggml_add(ctx, cur, layer_dir);
2382+
}
2383+
return cur;
2384+
}
2385+
23782386
~llama_control_vector() {
23792387
for (struct ggml_context * ctx : ctxs) {
23802388
ggml_free(ctx);
@@ -8023,10 +8031,7 @@ struct llm_build_context {
80238031
cur = ggml_add(ctx0, cur, ffn_inp);
80248032
cb(cur, "ffn_out", il);
80258033

8026-
ggml_tensor * layer_dir = lctx.cvec.tensor_for(il);
8027-
if (layer_dir != nullptr) {
8028-
cur = ggml_add(ctx0, cur, layer_dir);
8029-
}
8034+
cur = lctx.cvec.apply_to(ctx0, cur, il);
80308035
cb(cur, "l_out", il);
80318036

80328037
// input for next layer
@@ -8141,6 +8146,7 @@ struct llm_build_context {
81418146
}
81428147

81438148
cur = ggml_add(ctx0, cur, ffn_inp);
8149+
cur = lctx.cvec.apply_to(ctx0, cur, il);
81448150
cb(cur, "l_out", il);
81458151

81468152
// input for next layer
@@ -8245,6 +8251,7 @@ struct llm_build_context {
82458251
}
82468252

82478253
cur = ggml_add(ctx0, cur, ffn_inp);
8254+
cur = lctx.cvec.apply_to(ctx0, cur, il);
82488255
cb(cur, "l_out", il);
82498256

82508257
// input for next layer
@@ -8360,9 +8367,8 @@ struct llm_build_context {
83608367
}
83618368

83628369
cur = ggml_add(ctx0, cur, ffn_inp);
8363-
cb(cur, "l_out", il);
8364-
83658370
cur = ggml_add(ctx0, cur, inpL);
8371+
cur = lctx.cvec.apply_to(ctx0, cur, il);
83668372
cb(cur, "l_out", il);
83678373

83688374
// input for next layer
@@ -8514,10 +8520,7 @@ struct llm_build_context {
85148520
cur = ggml_add(ctx0, cur, ffn_inp);
85158521
cb(cur, "ffn_out", il);
85168522

8517-
ggml_tensor * layer_dir = lctx.cvec.tensor_for(il);
8518-
if (layer_dir != nullptr) {
8519-
cur = ggml_add(ctx0, cur, layer_dir);
8520-
}
8523+
cur = lctx.cvec.apply_to(ctx0, cur, il);
85218524
cb(cur, "l_out", il);
85228525

85238526
// input for next layer
@@ -8648,10 +8651,7 @@ struct llm_build_context {
86488651
cur = ggml_add(ctx0, cur, ffn_inp);
86498652
cb(cur, "ffn_out", il);
86508653

8651-
ggml_tensor * layer_dir = lctx.cvec.tensor_for(il);
8652-
if (layer_dir != nullptr) {
8653-
cur = ggml_add(ctx0, cur, layer_dir);
8654-
}
8654+
cur = lctx.cvec.apply_to(ctx0, cur, il);
86558655
cb(cur, "l_out", il);
86568656

86578657
// input for next layer
@@ -8757,8 +8757,12 @@ struct llm_build_context {
87578757
cb(cur, "ffn_out", il);
87588758
}
87598759

8760-
inpL = ggml_add(ctx0, cur, ffn_inp);
8761-
cb(inpL, "l_out", il);
8760+
cur = ggml_add(ctx0, cur, ffn_inp);
8761+
cur = lctx.cvec.apply_to(ctx0, cur, il);
8762+
cb(cur, "l_out", il);
8763+
8764+
// input for next layer
8765+
inpL = cur;
87628766
}
87638767

87648768
cur = llm_build_norm(ctx0, inpL, hparams,
@@ -8846,6 +8850,7 @@ struct llm_build_context {
88468850
}
88478851

88488852
cur = ggml_add(ctx0, cur, ffn_inp);
8853+
cur = lctx.cvec.apply_to(ctx0, cur, il);
88498854
cb(cur, "l_out", il);
88508855

88518856
// input for next layer
@@ -9141,8 +9146,12 @@ struct llm_build_context {
91419146
cb(cur, "ffn_out", il);
91429147
}
91439148

9144-
inpL = ggml_add(ctx0, cur, ffn_inp);
9145-
cb(inpL, "l_out", il);
9149+
cur = ggml_add(ctx0, cur, ffn_inp);
9150+
cur = lctx.cvec.apply_to(ctx0, cur, il);
9151+
cb(cur, "l_out", il);
9152+
9153+
// input for next layer
9154+
inpL = cur;
91469155
}
91479156

91489157
cur = llm_build_norm(ctx0, inpL, hparams,
@@ -9276,6 +9285,7 @@ struct llm_build_context {
92769285
}
92779286

92789287
cur = ggml_add(ctx0, cur, ffn_inp);
9288+
cur = lctx.cvec.apply_to(ctx0, cur, il);
92799289
cb(cur, "l_out", il);
92809290

92819291
// input for next layer
@@ -9424,6 +9434,7 @@ struct llm_build_context {
94249434
}
94259435

94269436
cur = ggml_add(ctx0, cur, ffn_inp);
9437+
cur = lctx.cvec.apply_to(ctx0, cur, il);
94279438
cb(cur, "l_out", il);
94289439

94299440
// input for next layer
@@ -9536,6 +9547,7 @@ struct llm_build_context {
95369547
}
95379548

95389549
cur = ggml_add(ctx0, cur, ffn_inp);
9550+
cur = lctx.cvec.apply_to(ctx0, cur, il);
95399551
cb(cur, "l_out", il);
95409552

95419553
// input for next layer
@@ -9647,6 +9659,7 @@ struct llm_build_context {
96479659
cb(cur, "ffn_out", il);
96489660

96499661
cur = ggml_add(ctx0, cur, ffn_inp);
9662+
cur = lctx.cvec.apply_to(ctx0, cur, il);
96509663
cb(cur, "l_out", il);
96519664

96529665
// input for next layer
@@ -9792,6 +9805,7 @@ struct llm_build_context {
97929805
}
97939806

97949807
cur = ggml_add(ctx0, cur, ffn_inp);
9808+
cur = lctx.cvec.apply_to(ctx0, cur, il);
97959809
cb(cur, "l_out", il);
97969810

97979811
// input for next layer
@@ -9912,11 +9926,11 @@ struct llm_build_context {
99129926
}
99139927

99149928
cur = ggml_add(ctx0, cur, ffn_output);
9915-
cb(cur, "l_out", il);
9916-
99179929
cur = ggml_add(ctx0, cur, inpL);
9930+
cur = lctx.cvec.apply_to(ctx0, cur, il);
99189931
cb(cur, "l_out", il);
99199932

9933+
// input for next layer
99209934
inpL = cur;
99219935
}
99229936

@@ -10048,8 +10062,10 @@ struct llm_build_context {
1004810062
}
1004910063

1005010064
cur = ggml_add(ctx0, residual, cur);
10065+
cur = lctx.cvec.apply_to(ctx0, cur, il);
1005110066
cb(cur, "l_out", il);
1005210067

10068+
// input for next layer
1005310069
inpL = cur;
1005410070
}
1005510071

@@ -10148,9 +10164,8 @@ struct llm_build_context {
1014810164
}
1014910165

1015010166
cur = ggml_add(ctx0, cur, sa_out);
10151-
cb(cur, "l_out", il);
10152-
1015310167
cur = ggml_add(ctx0, cur, inpL);
10168+
cur = lctx.cvec.apply_to(ctx0, cur, il);
1015410169
cb(cur, "l_out", il);
1015510170

1015610171
// input for next layer
@@ -10256,8 +10271,12 @@ struct llm_build_context {
1025610271
cb(cur, "ffn_out", il);
1025710272
}
1025810273

10259-
inpL = ggml_add(ctx0, cur, ffn_inp);
10260-
cb(inpL, "l_out", il);
10274+
cur = ggml_add(ctx0, cur, ffn_inp);
10275+
cur = lctx.cvec.apply_to(ctx0, cur, il);
10276+
cb(cur, "l_out", il);
10277+
10278+
// input for next layer
10279+
inpL = cur;
1026110280
}
1026210281

1026310282
cur = llm_build_norm(ctx0, inpL, hparams,
@@ -10363,8 +10382,12 @@ struct llm_build_context {
1036310382
cb(cur, "ffn_out", il);
1036410383
}
1036510384

10366-
inpL = ggml_add(ctx0, cur, ffn_inp);
10367-
cb(inpL, "l_out", il);
10385+
cur = ggml_add(ctx0, cur, ffn_inp);
10386+
cur = lctx.cvec.apply_to(ctx0, cur, il);
10387+
cb(cur, "l_out", il);
10388+
10389+
// input for next layer
10390+
inpL = cur;
1036810391
}
1036910392

1037010393
cur = llm_build_norm(ctx0, inpL, hparams,
@@ -10476,6 +10499,7 @@ struct llm_build_context {
1047610499
cb(cur, "ffn_out", il);
1047710500

1047810501
cur = ggml_add(ctx0, cur, ffn_inp);
10502+
cur = lctx.cvec.apply_to(ctx0, cur, il);
1047910503
cb(cur, "l_out", il);
1048010504

1048110505
// input for next layer
@@ -10593,6 +10617,7 @@ struct llm_build_context {
1059310617
cb(cur, "ffn_out", il);
1059410618

1059510619
cur = ggml_add(ctx0, cur, ffn_inp);
10620+
cur = lctx.cvec.apply_to(ctx0, cur, il);
1059610621
cb(cur, "l_out", il);
1059710622

1059810623
// input for next layer
@@ -10734,6 +10759,7 @@ struct llm_build_context {
1073410759
cb(cur, "hidden_scaled_ffn", -1);
1073510760

1073610761
cur = ggml_add(ctx0, cur, ffn_inp);
10762+
cur = lctx.cvec.apply_to(ctx0, cur, il);
1073710763
cb(cur, "l_out", il);
1073810764

1073910765
// input for next layer
@@ -10846,6 +10872,7 @@ struct llm_build_context {
1084610872
}
1084710873

1084810874
cur = ggml_add(ctx0, cur, sa_out);
10875+
cur = lctx.cvec.apply_to(ctx0, cur, il);
1084910876
cb(cur, "l_out", il);
1085010877

1085110878
// input for next layer
@@ -10962,7 +10989,9 @@ struct llm_build_context {
1096210989
NULL,
1096310990
LLM_FFN_GELU, LLM_FFN_SEQ, cb, il);
1096410991
cb(cur, "ffn_out", il);
10992+
1096510993
cur = ggml_add(ctx0, cur, ffn_inp);
10994+
cur = lctx.cvec.apply_to(ctx0, cur, il);
1096610995
cb(cur, "l_out", il);
1096710996

1096810997
// input for next layer
@@ -11111,6 +11140,7 @@ struct llm_build_context {
1111111140

1111211141
// residual
1111311142
cur = ggml_add(ctx0, cur, inpL);
11143+
cur = lctx.cvec.apply_to(ctx0, cur, il);
1111411144
cb(cur, "l_out", il);
1111511145

1111611146
// input for next layer
@@ -11252,6 +11282,7 @@ struct llm_build_context {
1125211282
// add together residual + FFN + self-attention
1125311283
cur = ggml_add(ctx0, cur, inpL);
1125411284
cur = ggml_add(ctx0, cur, attn_out);
11285+
cur = lctx.cvec.apply_to(ctx0, cur, il);
1125511286
cb(cur, "l_out", il);
1125611287

1125711288
// input for next layer
@@ -11387,10 +11418,7 @@ struct llm_build_context {
1138711418
cur = ggml_add(ctx0, cur, ffn_inp);
1138811419
cb(cur, "ffn_out", il);
1138911420

11390-
ggml_tensor * layer_dir = lctx.cvec.tensor_for(il);
11391-
if (layer_dir != nullptr) {
11392-
cur = ggml_add(ctx0, cur, layer_dir);
11393-
}
11421+
cur = lctx.cvec.apply_to(ctx0, cur, il);
1139411422
cb(cur, "l_out", il);
1139511423

1139611424
// input for next layer
@@ -11504,8 +11532,12 @@ struct llm_build_context {
1150411532
cur = ggml_add(ctx0, cur, inpL);
1150511533
cb(cur, "ffn_out", il);
1150611534

11507-
inpL = ggml_add(ctx0, cur, attn_out);
11508-
cb(inpL, "l_out", il);
11535+
cur = ggml_add(ctx0, cur, attn_out);
11536+
cur = lctx.cvec.apply_to(ctx0, cur, il);
11537+
cb(cur, "l_out", il);
11538+
11539+
// input for next layer
11540+
inpL = cur;
1150911541
} else {
1151011542
// attention and ffn are computed sequentially
1151111543
// x = x + attn(ln1(x))
@@ -11528,8 +11560,12 @@ struct llm_build_context {
1152811560
LLM_FFN_GELU, LLM_FFN_SEQ, cb, il);
1152911561
cb(cur, "ffn_out", il);
1153011562

11531-
inpL = ggml_add(ctx0, cur, ffn_inp);
11532-
cb(inpL, "l_out", il);
11563+
cur = ggml_add(ctx0, cur, ffn_inp);
11564+
cur = lctx.cvec.apply_to(ctx0, cur, il);
11565+
cb(cur, "l_out", il);
11566+
11567+
// input for next layer
11568+
inpL = cur;
1153311569
}
1153411570
}
1153511571

@@ -11656,10 +11692,7 @@ struct llm_build_context {
1165611692
cur = ggml_add(ctx0, cur, ffn_out);
1165711693
cb(cur, "ffn_out", il);
1165811694

11659-
ggml_tensor * layer_dir = lctx.cvec.tensor_for(il);
11660-
if (layer_dir != nullptr) {
11661-
cur = ggml_add(ctx0, cur, layer_dir);
11662-
}
11695+
cur = lctx.cvec.apply_to(ctx0, cur, il);
1166311696
cb(cur, "l_out", il);
1166411697

1166511698
// input for next layer
@@ -11892,6 +11925,7 @@ struct llm_build_context {
1189211925
}
1189311926

1189411927
cur = ggml_add(ctx0, cur, ffn_inp);
11928+
cur = lctx.cvec.apply_to(ctx0, cur, il);
1189511929
cb(cur, "l_out", il);
1189611930

1189711931
// input for next layer

0 commit comments

Comments
 (0)