@@ -2368,13 +2368,21 @@ struct llama_control_vector {
2368
2368
int32_t layer_start = -1;
2369
2369
int32_t layer_end = -1;
2370
2370
2371
- ggml_tensor * tensor_for(int il) const {
2371
+ struct ggml_tensor * tensor_for(int il) const {
2372
2372
if (il < 0 || il < layer_start || il > layer_end || (size_t) il >= tensors.size()) {
2373
2373
return nullptr;
2374
2374
}
2375
2375
return tensors[il];
2376
2376
}
2377
2377
2378
+ struct ggml_tensor * apply_to(struct ggml_context * ctx, struct ggml_tensor * cur, int il) const {
2379
+ ggml_tensor * layer_dir = tensor_for(il);
2380
+ if (layer_dir != nullptr) {
2381
+ cur = ggml_add(ctx, cur, layer_dir);
2382
+ }
2383
+ return cur;
2384
+ }
2385
+
2378
2386
~llama_control_vector() {
2379
2387
for (struct ggml_context * ctx : ctxs) {
2380
2388
ggml_free(ctx);
@@ -8023,10 +8031,7 @@ struct llm_build_context {
8023
8031
cur = ggml_add(ctx0, cur, ffn_inp);
8024
8032
cb(cur, "ffn_out", il);
8025
8033
8026
- ggml_tensor * layer_dir = lctx.cvec.tensor_for(il);
8027
- if (layer_dir != nullptr) {
8028
- cur = ggml_add(ctx0, cur, layer_dir);
8029
- }
8034
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
8030
8035
cb(cur, "l_out", il);
8031
8036
8032
8037
// input for next layer
@@ -8141,6 +8146,7 @@ struct llm_build_context {
8141
8146
}
8142
8147
8143
8148
cur = ggml_add(ctx0, cur, ffn_inp);
8149
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
8144
8150
cb(cur, "l_out", il);
8145
8151
8146
8152
// input for next layer
@@ -8245,6 +8251,7 @@ struct llm_build_context {
8245
8251
}
8246
8252
8247
8253
cur = ggml_add(ctx0, cur, ffn_inp);
8254
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
8248
8255
cb(cur, "l_out", il);
8249
8256
8250
8257
// input for next layer
@@ -8360,9 +8367,8 @@ struct llm_build_context {
8360
8367
}
8361
8368
8362
8369
cur = ggml_add(ctx0, cur, ffn_inp);
8363
- cb(cur, "l_out", il);
8364
-
8365
8370
cur = ggml_add(ctx0, cur, inpL);
8371
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
8366
8372
cb(cur, "l_out", il);
8367
8373
8368
8374
// input for next layer
@@ -8514,10 +8520,7 @@ struct llm_build_context {
8514
8520
cur = ggml_add(ctx0, cur, ffn_inp);
8515
8521
cb(cur, "ffn_out", il);
8516
8522
8517
- ggml_tensor * layer_dir = lctx.cvec.tensor_for(il);
8518
- if (layer_dir != nullptr) {
8519
- cur = ggml_add(ctx0, cur, layer_dir);
8520
- }
8523
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
8521
8524
cb(cur, "l_out", il);
8522
8525
8523
8526
// input for next layer
@@ -8648,10 +8651,7 @@ struct llm_build_context {
8648
8651
cur = ggml_add(ctx0, cur, ffn_inp);
8649
8652
cb(cur, "ffn_out", il);
8650
8653
8651
- ggml_tensor * layer_dir = lctx.cvec.tensor_for(il);
8652
- if (layer_dir != nullptr) {
8653
- cur = ggml_add(ctx0, cur, layer_dir);
8654
- }
8654
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
8655
8655
cb(cur, "l_out", il);
8656
8656
8657
8657
// input for next layer
@@ -8757,8 +8757,12 @@ struct llm_build_context {
8757
8757
cb(cur, "ffn_out", il);
8758
8758
}
8759
8759
8760
- inpL = ggml_add(ctx0, cur, ffn_inp);
8761
- cb(inpL, "l_out", il);
8760
+ cur = ggml_add(ctx0, cur, ffn_inp);
8761
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
8762
+ cb(cur, "l_out", il);
8763
+
8764
+ // input for next layer
8765
+ inpL = cur;
8762
8766
}
8763
8767
8764
8768
cur = llm_build_norm(ctx0, inpL, hparams,
@@ -8846,6 +8850,7 @@ struct llm_build_context {
8846
8850
}
8847
8851
8848
8852
cur = ggml_add(ctx0, cur, ffn_inp);
8853
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
8849
8854
cb(cur, "l_out", il);
8850
8855
8851
8856
// input for next layer
@@ -9141,8 +9146,12 @@ struct llm_build_context {
9141
9146
cb(cur, "ffn_out", il);
9142
9147
}
9143
9148
9144
- inpL = ggml_add(ctx0, cur, ffn_inp);
9145
- cb(inpL, "l_out", il);
9149
+ cur = ggml_add(ctx0, cur, ffn_inp);
9150
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
9151
+ cb(cur, "l_out", il);
9152
+
9153
+ // input for next layer
9154
+ inpL = cur;
9146
9155
}
9147
9156
9148
9157
cur = llm_build_norm(ctx0, inpL, hparams,
@@ -9276,6 +9285,7 @@ struct llm_build_context {
9276
9285
}
9277
9286
9278
9287
cur = ggml_add(ctx0, cur, ffn_inp);
9288
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
9279
9289
cb(cur, "l_out", il);
9280
9290
9281
9291
// input for next layer
@@ -9424,6 +9434,7 @@ struct llm_build_context {
9424
9434
}
9425
9435
9426
9436
cur = ggml_add(ctx0, cur, ffn_inp);
9437
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
9427
9438
cb(cur, "l_out", il);
9428
9439
9429
9440
// input for next layer
@@ -9536,6 +9547,7 @@ struct llm_build_context {
9536
9547
}
9537
9548
9538
9549
cur = ggml_add(ctx0, cur, ffn_inp);
9550
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
9539
9551
cb(cur, "l_out", il);
9540
9552
9541
9553
// input for next layer
@@ -9647,6 +9659,7 @@ struct llm_build_context {
9647
9659
cb(cur, "ffn_out", il);
9648
9660
9649
9661
cur = ggml_add(ctx0, cur, ffn_inp);
9662
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
9650
9663
cb(cur, "l_out", il);
9651
9664
9652
9665
// input for next layer
@@ -9792,6 +9805,7 @@ struct llm_build_context {
9792
9805
}
9793
9806
9794
9807
cur = ggml_add(ctx0, cur, ffn_inp);
9808
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
9795
9809
cb(cur, "l_out", il);
9796
9810
9797
9811
// input for next layer
@@ -9912,11 +9926,11 @@ struct llm_build_context {
9912
9926
}
9913
9927
9914
9928
cur = ggml_add(ctx0, cur, ffn_output);
9915
- cb(cur, "l_out", il);
9916
-
9917
9929
cur = ggml_add(ctx0, cur, inpL);
9930
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
9918
9931
cb(cur, "l_out", il);
9919
9932
9933
+ // input for next layer
9920
9934
inpL = cur;
9921
9935
}
9922
9936
@@ -10048,8 +10062,10 @@ struct llm_build_context {
10048
10062
}
10049
10063
10050
10064
cur = ggml_add(ctx0, residual, cur);
10065
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
10051
10066
cb(cur, "l_out", il);
10052
10067
10068
+ // input for next layer
10053
10069
inpL = cur;
10054
10070
}
10055
10071
@@ -10148,9 +10164,8 @@ struct llm_build_context {
10148
10164
}
10149
10165
10150
10166
cur = ggml_add(ctx0, cur, sa_out);
10151
- cb(cur, "l_out", il);
10152
-
10153
10167
cur = ggml_add(ctx0, cur, inpL);
10168
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
10154
10169
cb(cur, "l_out", il);
10155
10170
10156
10171
// input for next layer
@@ -10256,8 +10271,12 @@ struct llm_build_context {
10256
10271
cb(cur, "ffn_out", il);
10257
10272
}
10258
10273
10259
- inpL = ggml_add(ctx0, cur, ffn_inp);
10260
- cb(inpL, "l_out", il);
10274
+ cur = ggml_add(ctx0, cur, ffn_inp);
10275
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
10276
+ cb(cur, "l_out", il);
10277
+
10278
+ // input for next layer
10279
+ inpL = cur;
10261
10280
}
10262
10281
10263
10282
cur = llm_build_norm(ctx0, inpL, hparams,
@@ -10363,8 +10382,12 @@ struct llm_build_context {
10363
10382
cb(cur, "ffn_out", il);
10364
10383
}
10365
10384
10366
- inpL = ggml_add(ctx0, cur, ffn_inp);
10367
- cb(inpL, "l_out", il);
10385
+ cur = ggml_add(ctx0, cur, ffn_inp);
10386
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
10387
+ cb(cur, "l_out", il);
10388
+
10389
+ // input for next layer
10390
+ inpL = cur;
10368
10391
}
10369
10392
10370
10393
cur = llm_build_norm(ctx0, inpL, hparams,
@@ -10476,6 +10499,7 @@ struct llm_build_context {
10476
10499
cb(cur, "ffn_out", il);
10477
10500
10478
10501
cur = ggml_add(ctx0, cur, ffn_inp);
10502
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
10479
10503
cb(cur, "l_out", il);
10480
10504
10481
10505
// input for next layer
@@ -10593,6 +10617,7 @@ struct llm_build_context {
10593
10617
cb(cur, "ffn_out", il);
10594
10618
10595
10619
cur = ggml_add(ctx0, cur, ffn_inp);
10620
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
10596
10621
cb(cur, "l_out", il);
10597
10622
10598
10623
// input for next layer
@@ -10734,6 +10759,7 @@ struct llm_build_context {
10734
10759
cb(cur, "hidden_scaled_ffn", -1);
10735
10760
10736
10761
cur = ggml_add(ctx0, cur, ffn_inp);
10762
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
10737
10763
cb(cur, "l_out", il);
10738
10764
10739
10765
// input for next layer
@@ -10846,6 +10872,7 @@ struct llm_build_context {
10846
10872
}
10847
10873
10848
10874
cur = ggml_add(ctx0, cur, sa_out);
10875
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
10849
10876
cb(cur, "l_out", il);
10850
10877
10851
10878
// input for next layer
@@ -10962,7 +10989,9 @@ struct llm_build_context {
10962
10989
NULL,
10963
10990
LLM_FFN_GELU, LLM_FFN_SEQ, cb, il);
10964
10991
cb(cur, "ffn_out", il);
10992
+
10965
10993
cur = ggml_add(ctx0, cur, ffn_inp);
10994
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
10966
10995
cb(cur, "l_out", il);
10967
10996
10968
10997
// input for next layer
@@ -11111,6 +11140,7 @@ struct llm_build_context {
11111
11140
11112
11141
// residual
11113
11142
cur = ggml_add(ctx0, cur, inpL);
11143
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
11114
11144
cb(cur, "l_out", il);
11115
11145
11116
11146
// input for next layer
@@ -11252,6 +11282,7 @@ struct llm_build_context {
11252
11282
// add together residual + FFN + self-attention
11253
11283
cur = ggml_add(ctx0, cur, inpL);
11254
11284
cur = ggml_add(ctx0, cur, attn_out);
11285
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
11255
11286
cb(cur, "l_out", il);
11256
11287
11257
11288
// input for next layer
@@ -11387,10 +11418,7 @@ struct llm_build_context {
11387
11418
cur = ggml_add(ctx0, cur, ffn_inp);
11388
11419
cb(cur, "ffn_out", il);
11389
11420
11390
- ggml_tensor * layer_dir = lctx.cvec.tensor_for(il);
11391
- if (layer_dir != nullptr) {
11392
- cur = ggml_add(ctx0, cur, layer_dir);
11393
- }
11421
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
11394
11422
cb(cur, "l_out", il);
11395
11423
11396
11424
// input for next layer
@@ -11504,8 +11532,12 @@ struct llm_build_context {
11504
11532
cur = ggml_add(ctx0, cur, inpL);
11505
11533
cb(cur, "ffn_out", il);
11506
11534
11507
- inpL = ggml_add(ctx0, cur, attn_out);
11508
- cb(inpL, "l_out", il);
11535
+ cur = ggml_add(ctx0, cur, attn_out);
11536
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
11537
+ cb(cur, "l_out", il);
11538
+
11539
+ // input for next layer
11540
+ inpL = cur;
11509
11541
} else {
11510
11542
// attention and ffn are computed sequentially
11511
11543
// x = x + attn(ln1(x))
@@ -11528,8 +11560,12 @@ struct llm_build_context {
11528
11560
LLM_FFN_GELU, LLM_FFN_SEQ, cb, il);
11529
11561
cb(cur, "ffn_out", il);
11530
11562
11531
- inpL = ggml_add(ctx0, cur, ffn_inp);
11532
- cb(inpL, "l_out", il);
11563
+ cur = ggml_add(ctx0, cur, ffn_inp);
11564
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
11565
+ cb(cur, "l_out", il);
11566
+
11567
+ // input for next layer
11568
+ inpL = cur;
11533
11569
}
11534
11570
}
11535
11571
@@ -11656,10 +11692,7 @@ struct llm_build_context {
11656
11692
cur = ggml_add(ctx0, cur, ffn_out);
11657
11693
cb(cur, "ffn_out", il);
11658
11694
11659
- ggml_tensor * layer_dir = lctx.cvec.tensor_for(il);
11660
- if (layer_dir != nullptr) {
11661
- cur = ggml_add(ctx0, cur, layer_dir);
11662
- }
11695
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
11663
11696
cb(cur, "l_out", il);
11664
11697
11665
11698
// input for next layer
@@ -11892,6 +11925,7 @@ struct llm_build_context {
11892
11925
}
11893
11926
11894
11927
cur = ggml_add(ctx0, cur, ffn_inp);
11928
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
11895
11929
cb(cur, "l_out", il);
11896
11930
11897
11931
// input for next layer
0 commit comments