@@ -381,7 +381,7 @@ void randomize_model(struct llama_model * model, int seed, float mean, float std
381
381
randomize_tensor_normal (model->tok_embeddings , model->tok_embeddings ->n_dims , model->tok_embeddings ->ne , &rnd);
382
382
randomize_tensor_normal (model->norm , model->norm ->n_dims , model->norm ->ne , &rnd);
383
383
randomize_tensor_normal (model->output , model->output ->n_dims , model->output ->ne , &rnd);
384
-
384
+
385
385
for (uint32_t i = 0 ; i < n_layer; ++i) {
386
386
auto & layer = model->layers [i];
387
387
randomize_tensor_normal (layer.attention_norm , layer.attention_norm ->n_dims , layer.attention_norm ->ne , &rnd);
@@ -415,7 +415,7 @@ void randomize_model_lora(struct llama_model_lora * model, int seed, float mean,
415
415
randomize_tensor_normal (model->norm , model->norm ->n_dims , model->norm ->ne , &rnd);
416
416
randomize_tensor_normal (model->outputa , model->outputa ->n_dims , model->outputa ->ne , &rnd);
417
417
randomize_tensor_normal (model->outputb , model->outputb ->n_dims , model->outputb ->ne , &rnd);
418
-
418
+
419
419
for (uint32_t i = 0 ; i < n_layer; ++i) {
420
420
auto & layer = model->layers [i];
421
421
randomize_tensor_normal (layer.attention_norm , layer.attention_norm ->n_dims , layer.attention_norm ->ne , &rnd);
@@ -508,14 +508,14 @@ bool init_kv_cache_lora(struct llama_kv_cache* cache, struct llama_model_lora *
508
508
}
509
509
510
510
struct ggml_tensor * forward (
511
- struct llama_model * model,
512
- struct llama_kv_cache * cache,
511
+ struct llama_model * model,
512
+ struct llama_kv_cache * cache,
513
513
struct ggml_context * ctx0,
514
514
struct ggml_cgraph * gf,
515
515
struct ggml_tensor * tokens_input,
516
516
const int n_tokens,
517
517
const int n_past) {
518
-
518
+
519
519
const int N = n_tokens;
520
520
521
521
struct llama_kv_cache & kv_self = *cache;
@@ -569,11 +569,11 @@ struct ggml_tensor * forward(
569
569
// Vcur shape [n_embd, N, 1, 1]
570
570
struct ggml_tensor * Vcur = ggml_cont (ctx0, ggml_transpose (ctx0, ggml_reshape_2d (ctx0, ggml_mul_mat (ctx0, model->layers [il].wv , cur), n_embd, N)));
571
571
572
- // kv_self.k shape [n_embd * n_ctx * n_layer, 1]
573
- // kv_self.v shape [n_embd * n_ctx * n_layer, 1]
572
+ // kv_self.k shape [n_embd * n_ctx * n_layer, 1]
573
+ // kv_self.v shape [n_embd * n_ctx * n_layer, 1]
574
574
// k shape [n_embd * N, 1] == kv_self.k[:,n_past:n_past+N,il,0]
575
575
// v shape [N, n_embd, 1, 1] == kv_self.v[:,n_past:n_past+N,il,0]
576
-
576
+
577
577
/* {
578
578
struct ggml_tensor * k = ggml_view_1d(ctx0, kv_self.k, N*n_embd, (ggml_element_size(kv_self.k)*n_embd)*(il*n_ctx + n_past));
579
579
struct ggml_tensor * v = ggml_view_2d(ctx0, kv_self.v, N, n_embd,
@@ -597,7 +597,7 @@ struct ggml_tensor * forward(
597
597
Qcur,
598
598
0 , 2 , 1 , 3 );
599
599
600
- // kv_self.k shape [n_embd * n_ctx * n_layer, 1]
600
+ // kv_self.k shape [n_embd * n_ctx * n_layer, 1]
601
601
// K shape [n_embd/n_head, n_past + N, n_head, 1]
602
602
struct ggml_tensor * K =
603
603
ggml_permute (ctx0,
@@ -641,7 +641,7 @@ struct ggml_tensor * forward(
641
641
// KQV_merged = KQV.permute(0, 2, 1, 3)
642
642
// KQV_merged shape [n_embd/n_head, n_head, N, 1]
643
643
struct ggml_tensor * KQV_merged = ggml_permute (ctx0, KQV, 0 , 2 , 1 , 3 );
644
- // KQV_merged shape
644
+ // KQV_merged shape
645
645
646
646
// cur = KQV_merged.contiguous().view(n_embd, N)
647
647
// cur shape [n_embd,N,1,1]
@@ -734,14 +734,14 @@ struct ggml_tensor * forward(
734
734
735
735
736
736
struct ggml_tensor * forward_lora (
737
- struct llama_model_lora * model,
738
- struct llama_kv_cache * cache,
737
+ struct llama_model_lora * model,
738
+ struct llama_kv_cache * cache,
739
739
struct ggml_context * ctx0,
740
740
struct ggml_cgraph * gf,
741
741
struct ggml_tensor * tokens_input,
742
742
const int n_tokens,
743
743
const int n_past) {
744
-
744
+
745
745
const int N = n_tokens;
746
746
747
747
struct llama_kv_cache & kv_self = *cache;
@@ -784,45 +784,45 @@ struct ggml_tensor * forward_lora(
784
784
// wk shape [n_embd, n_embd, 1, 1]
785
785
// Qcur shape [n_embd/n_head, n_head, N, 1]
786
786
// Kcur shape [n_embd/n_head, n_head, N, 1]
787
- struct ggml_tensor * Qcur = ggml_rope (ctx0,
788
- ggml_reshape_3d (ctx0,
789
- ggml_mul_mat (ctx0,
790
- model->layers [il].wqa ,
791
- ggml_mul_mat (ctx0,
792
- model->layers [il].wqb ,
793
- cur)),
794
- n_embd/n_head, n_head, N),
787
+ struct ggml_tensor * Qcur = ggml_rope (ctx0,
788
+ ggml_reshape_3d (ctx0,
789
+ ggml_mul_mat (ctx0,
790
+ model->layers [il].wqa ,
791
+ ggml_mul_mat (ctx0,
792
+ model->layers [il].wqb ,
793
+ cur)),
794
+ n_embd/n_head, n_head, N),
795
795
n_past, n_rot, 0 );
796
- struct ggml_tensor * Kcur = ggml_rope (ctx0,
797
- ggml_reshape_3d (ctx0,
798
- ggml_mul_mat (ctx0,
799
- model->layers [il].wka ,
800
- ggml_mul_mat (ctx0,
801
- model->layers [il].wkb ,
802
- cur)),
803
- n_embd/n_head, n_head, N),
796
+ struct ggml_tensor * Kcur = ggml_rope (ctx0,
797
+ ggml_reshape_3d (ctx0,
798
+ ggml_mul_mat (ctx0,
799
+ model->layers [il].wka ,
800
+ ggml_mul_mat (ctx0,
801
+ model->layers [il].wkb ,
802
+ cur)),
803
+ n_embd/n_head, n_head, N),
804
804
n_past, n_rot, 0 );
805
805
806
806
// store key and value to memory
807
807
{
808
808
// compute the transposed [N, n_embd] V matrix
809
809
// wv shape [n_embd, n_embd, 1, 1]
810
810
// Vcur shape [n_embd, N, 1, 1]
811
- struct ggml_tensor * Vcur = ggml_cont (ctx0,
812
- ggml_transpose (ctx0,
813
- ggml_reshape_2d (ctx0,
814
- ggml_mul_mat (ctx0,
815
- model->layers [il].wva ,
816
- ggml_mul_mat (ctx0,
817
- model->layers [il].wvb ,
818
- cur)),
811
+ struct ggml_tensor * Vcur = ggml_cont (ctx0,
812
+ ggml_transpose (ctx0,
813
+ ggml_reshape_2d (ctx0,
814
+ ggml_mul_mat (ctx0,
815
+ model->layers [il].wva ,
816
+ ggml_mul_mat (ctx0,
817
+ model->layers [il].wvb ,
818
+ cur)),
819
819
n_embd, N)));
820
820
821
- // kv_self.k shape [n_embd * n_ctx * n_layer, 1]
822
- // kv_self.v shape [n_embd * n_ctx * n_layer, 1]
821
+ // kv_self.k shape [n_embd * n_ctx * n_layer, 1]
822
+ // kv_self.v shape [n_embd * n_ctx * n_layer, 1]
823
823
// k shape [n_embd * N, 1] == kv_self.k[:,n_past:n_past+N,il,0]
824
824
// v shape [N, n_embd, 1, 1] == kv_self.v[:,n_past:n_past+N,il,0]
825
-
825
+
826
826
/* {
827
827
struct ggml_tensor * k = ggml_view_1d(ctx0, kv_self.k, N*n_embd, (ggml_element_size(kv_self.k)*n_embd)*(il*n_ctx + n_past));
828
828
struct ggml_tensor * v = ggml_view_2d(ctx0, kv_self.v, N, n_embd,
@@ -846,7 +846,7 @@ struct ggml_tensor * forward_lora(
846
846
Qcur,
847
847
0 , 2 , 1 , 3 );
848
848
849
- // kv_self.k shape [n_embd * n_ctx * n_layer, 1]
849
+ // kv_self.k shape [n_embd * n_ctx * n_layer, 1]
850
850
// K shape [n_embd/n_head, n_past + N, n_head, 1]
851
851
struct ggml_tensor * K =
852
852
ggml_permute (ctx0,
@@ -890,7 +890,7 @@ struct ggml_tensor * forward_lora(
890
890
// KQV_merged = KQV.permute(0, 2, 1, 3)
891
891
// KQV_merged shape [n_embd/n_head, n_head, N, 1]
892
892
struct ggml_tensor * KQV_merged = ggml_permute (ctx0, KQV, 0 , 2 , 1 , 3 );
893
- // KQV_merged shape
893
+ // KQV_merged shape
894
894
895
895
// cur = KQV_merged.contiguous().view(n_embd, N)
896
896
// cur shape [n_embd,N,1,1]
@@ -974,10 +974,10 @@ struct ggml_tensor * forward_lora(
974
974
975
975
// lm_head
976
976
// inpL shape [n_vocab,N,1,1]
977
- inpL = ggml_mul_mat (ctx0,
978
- model->outputa ,
979
- ggml_mul_mat (ctx0,
980
- model->outputb ,
977
+ inpL = ggml_mul_mat (ctx0,
978
+ model->outputa ,
979
+ ggml_mul_mat (ctx0,
980
+ model->outputb ,
981
981
inpL));
982
982
983
983
// ggml_set_scratch(ctx0, { 0, 0, nullptr, });
@@ -1094,12 +1094,12 @@ struct ggml_tensor * square_error_loss(struct ggml_context * ctx, struct ggml_te
1094
1094
1095
1095
struct ggml_tensor * cross_entropy_loss (struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b) {
1096
1096
const float eps = 1e-3 ;
1097
- return
1098
- ggml_sum (ctx,
1099
- ggml_neg (ctx,
1100
- ggml_sum_rows (ctx,
1101
- ggml_mul (ctx,
1102
- ggml_soft_max (ctx, a),
1097
+ return
1098
+ ggml_sum (ctx,
1099
+ ggml_neg (ctx,
1100
+ ggml_sum_rows (ctx,
1101
+ ggml_mul (ctx,
1102
+ ggml_soft_max (ctx, a),
1103
1103
ggml_log (ctx,
1104
1104
ggml_add1 (ctx,
1105
1105
ggml_soft_max (ctx, b),
@@ -1169,7 +1169,7 @@ int main(int argc, char ** argv) {
1169
1169
*/
1170
1170
1171
1171
// key + value cache for the self attention
1172
- struct llama_kv_cache kv_self;
1172
+ struct llama_kv_cache kv_self;
1173
1173
printf (" init_kv_cache\n " );
1174
1174
kv_self.ctx = model.ctx ;
1175
1175
init_kv_cache (&kv_self, &model);
@@ -1221,17 +1221,17 @@ int main(int argc, char ** argv) {
1221
1221
struct ggml_tensor * logits2 = forward (&model, &kv_self, ctx0, &gf, tokens_input2, n_tokens, n_past);
1222
1222
// struct ggml_tensor * logits3 = forward(&model, &kv_self, ctx0, &gf, tokens_input3, n_tokens, n_past);
1223
1223
// struct ggml_tensor * logits4 = forward(&model, &kv_self, ctx0, &gf, tokens_input4, n_tokens, n_past);
1224
-
1224
+
1225
1225
// struct ggml_tensor * e = cross_entropy_loss(ctx0, targets1, logits1);
1226
1226
// struct ggml_tensor * e = square_error_loss(ctx0, targets1, logits1);
1227
-
1227
+
1228
1228
struct ggml_tensor * e = ggml_add (ctx0,
1229
1229
square_error_loss (ctx0, targets1, logits1),
1230
1230
square_error_loss (ctx0, targets2, logits2));
1231
1231
// struct ggml_tensor * e = ggml_add(ctx0,
1232
1232
// cross_entropy_loss(ctx0, targets1, logits1),
1233
1233
// cross_entropy_loss(ctx0, targets2, logits2));
1234
- // struct ggml_tensor * e = ggml_add(ctx0,
1234
+ // struct ggml_tensor * e = ggml_add(ctx0,
1235
1235
// ggml_add(ctx0,
1236
1236
// cross_entropy_loss(ctx0, targets1, logits1),
1237
1237
// cross_entropy_loss(ctx0, targets2, logits2)),
@@ -1260,7 +1260,7 @@ int main(int argc, char ** argv) {
1260
1260
opt_params_lbfgs.lbfgs .n_iter = 16 ;
1261
1261
// ggml_opt(ctx0, opt_params_adam, e);
1262
1262
ggml_opt (ctx0, opt_params_lbfgs, e);
1263
- //
1263
+ //
1264
1264
ggml_build_forward_expand (&gf, e);
1265
1265
ggml_graph_compute (ctx0, &gf);
1266
1266
@@ -1292,7 +1292,7 @@ int main(int argc, char ** argv) {
1292
1292
1293
1293
struct ggml_tensor * tokens_input = ggml_new_tensor_1d (model.ctx , GGML_TYPE_I32, n_tokens);
1294
1294
struct ggml_tensor * targets = ggml_new_tensor_2d (model.ctx , GGML_TYPE_F32, n_vocab, n_tokens);
1295
-
1295
+
1296
1296
get_example_targets (137 , tokens_input, targets);
1297
1297
for (int i=sample_ctx; i<n_tokens; ++i) {
1298
1298
ggml_set_i32_1d (tokens_input, i, n_vocab/2 );
@@ -1327,14 +1327,14 @@ int main(int argc, char ** argv) {
1327
1327
1328
1328
// int sample_at = n_tokens-1;
1329
1329
int token = ggml_get_i32_1d (best_samples, sample_ctx-1 );
1330
-
1330
+
1331
1331
// print_row(probs, sample_at);
1332
1332
print_token (token, n_vocab);
1333
1333
1334
1334
lshift_examples (tokens_input, targets, 1 );
1335
1335
ggml_set_i32_1d (tokens_input, 0 , 0 );
1336
1336
ggml_set_i32_1d (tokens_input, sample_ctx-1 , token);
1337
-
1337
+
1338
1338
// printf("---\n");
1339
1339
// for (int i=0; i<sample_ctx-1; ++i) {
1340
1340
// print_token(ggml_get_i32_1d(tokens_input, i), model.hparams.n_vocab);
@@ -1350,7 +1350,7 @@ int main(int argc, char ** argv) {
1350
1350
}
1351
1351
printf (" important (dont optimize it away, compiler!) : %d\n " , important_sum);
1352
1352
}
1353
-
1353
+
1354
1354
print_matrix (model.tok_embeddings );
1355
1355
1356
1356
printf (" done\n " );
0 commit comments