@@ -51,9 +51,9 @@ struct my_llama_layer {
51
51
struct ggml_tensor * ffn_norm;
52
52
53
53
// ff
54
- struct ggml_tensor * w1;
55
- struct ggml_tensor * w2;
56
- struct ggml_tensor * w3;
54
+ struct ggml_tensor * ffn_gate; // w1
55
+ struct ggml_tensor * ffn_down; // w2
56
+ struct ggml_tensor * ffn_up; // w3
57
57
};
58
58
59
59
struct my_llama_model {
@@ -141,9 +141,9 @@ static void set_param_model(struct my_llama_model * model) {
141
141
ggml_set_param (ctx, layer.wv );
142
142
ggml_set_param (ctx, layer.wo );
143
143
ggml_set_param (ctx, layer.ffn_norm );
144
- ggml_set_param (ctx, layer.w1 );
145
- ggml_set_param (ctx, layer.w2 );
146
- ggml_set_param (ctx, layer.w3 );
144
+ ggml_set_param (ctx, layer.ffn_gate );
145
+ ggml_set_param (ctx, layer.ffn_down );
146
+ ggml_set_param (ctx, layer.ffn_up );
147
147
}
148
148
}
149
149
@@ -159,9 +159,9 @@ static void alloc_model(struct ggml_allocr * alloc, struct my_llama_model * mode
159
159
ggml_allocr_alloc (alloc, layer.wv );
160
160
ggml_allocr_alloc (alloc, layer.wo );
161
161
ggml_allocr_alloc (alloc, layer.ffn_norm );
162
- ggml_allocr_alloc (alloc, layer.w1 );
163
- ggml_allocr_alloc (alloc, layer.w2 );
164
- ggml_allocr_alloc (alloc, layer.w3 );
162
+ ggml_allocr_alloc (alloc, layer.ffn_gate );
163
+ ggml_allocr_alloc (alloc, layer.ffn_down );
164
+ ggml_allocr_alloc (alloc, layer.ffn_up );
165
165
}
166
166
ggml_allocr_alloc (alloc, model->tok_embeddings ->grad );
167
167
ggml_allocr_alloc (alloc, model->norm ->grad );
@@ -174,9 +174,9 @@ static void alloc_model(struct ggml_allocr * alloc, struct my_llama_model * mode
174
174
ggml_allocr_alloc (alloc, layer.wv ->grad );
175
175
ggml_allocr_alloc (alloc, layer.wo ->grad );
176
176
ggml_allocr_alloc (alloc, layer.ffn_norm ->grad );
177
- ggml_allocr_alloc (alloc, layer.w1 ->grad );
178
- ggml_allocr_alloc (alloc, layer.w2 ->grad );
179
- ggml_allocr_alloc (alloc, layer.w3 ->grad );
177
+ ggml_allocr_alloc (alloc, layer.ffn_gate ->grad );
178
+ ggml_allocr_alloc (alloc, layer.ffn_down ->grad );
179
+ ggml_allocr_alloc (alloc, layer.ffn_up ->grad );
180
180
}
181
181
}
182
182
@@ -232,9 +232,9 @@ static void init_model(struct my_llama_model * model) {
232
232
233
233
layer.ffn_norm = ggml_new_tensor_1d (ctx, GGML_TYPE_F32, n_embd);
234
234
235
- layer.w1 = ggml_new_tensor_2d (ctx, GGML_TYPE_F32, n_embd, n_ff);
236
- layer.w2 = ggml_new_tensor_2d (ctx, GGML_TYPE_F32, n_ff, n_embd);
237
- layer.w3 = ggml_new_tensor_2d (ctx, GGML_TYPE_F32, n_embd, n_ff);
235
+ layer.ffn_gate = ggml_new_tensor_2d (ctx, GGML_TYPE_F32, n_embd, n_ff);
236
+ layer.ffn_down = ggml_new_tensor_2d (ctx, GGML_TYPE_F32, n_ff, n_embd);
237
+ layer.ffn_up = ggml_new_tensor_2d (ctx, GGML_TYPE_F32, n_embd, n_ff);
238
238
239
239
ggml_set_name (layer.attention_norm , tni (LLM_TENSOR_ATTN_NORM, i));
240
240
@@ -245,9 +245,9 @@ static void init_model(struct my_llama_model * model) {
245
245
246
246
ggml_set_name (layer.ffn_norm , tni (LLM_TENSOR_FFN_NORM, i));
247
247
248
- ggml_set_name (layer.w1 , tni (LLM_TENSOR_FFN_GATE, i));
249
- ggml_set_name (layer.w2 , tni (LLM_TENSOR_FFN_DOWN, i));
250
- ggml_set_name (layer.w3 , tni (LLM_TENSOR_FFN_UP, i));
248
+ ggml_set_name (layer.ffn_gate , tni (LLM_TENSOR_FFN_GATE, i));
249
+ ggml_set_name (layer.ffn_down , tni (LLM_TENSOR_FFN_DOWN, i));
250
+ ggml_set_name (layer.ffn_up , tni (LLM_TENSOR_FFN_UP, i));
251
251
}
252
252
253
253
set_param_model (model);
@@ -288,9 +288,9 @@ static void randomize_model(struct my_llama_model * model, int seed, float mean,
288
288
289
289
randomize_tensor_normal (layer.ffn_norm , rnd);
290
290
291
- randomize_tensor_normal (layer.w1 , rnd);
292
- randomize_tensor_normal (layer.w2 , rnd);
293
- randomize_tensor_normal (layer.w3 , rnd);
291
+ randomize_tensor_normal (layer.ffn_gate , rnd);
292
+ randomize_tensor_normal (layer.ffn_down , rnd);
293
+ randomize_tensor_normal (layer.ffn_up , rnd);
294
294
}
295
295
296
296
free_random_normal_distribution (rnd);
@@ -405,11 +405,11 @@ static struct ggml_tensor * llama_build_train_graphs(
405
405
struct ggml_tensor * t22 = ggml_rms_norm (ctx, t21, f_norm_rms_eps); set_name (t22, " t22" ); assert_shape_2d (t22, n_embd, N*n_batch);
406
406
struct ggml_tensor * t23 = ggml_repeat (ctx, layer.ffn_norm , t22); set_name (t23, " t23" ); assert_shape_2d (t23, n_embd, N*n_batch);
407
407
struct ggml_tensor * t24 = ggml_mul (ctx, t23, t22); set_name (t24, " t24" ); assert_shape_2d (t24, n_embd, N*n_batch);
408
- struct ggml_tensor * t25 = ggml_mul_mat (ctx, layer.w3 , t24); set_name (t25, " t25" ); assert_shape_2d (t25, n_ff, N*n_batch);
409
- struct ggml_tensor * t26 = ggml_mul_mat (ctx, layer.w1 , t24); set_name (t26, " t26" ); assert_shape_2d (t26, n_ff, N*n_batch);
408
+ struct ggml_tensor * t25 = ggml_mul_mat (ctx, layer.ffn_up , t24); set_name (t25, " t25" ); assert_shape_2d (t25, n_ff, N*n_batch);
409
+ struct ggml_tensor * t26 = ggml_mul_mat (ctx, layer.ffn_gate , t24); set_name (t26, " t26" ); assert_shape_2d (t26, n_ff, N*n_batch);
410
410
struct ggml_tensor * t27 = ggml_silu (ctx, t26); set_name (t27, " t27" ); assert_shape_2d (t27, n_ff, N*n_batch);
411
411
struct ggml_tensor * t28 = ggml_mul (ctx, t27, t25); set_name (t28, " t28" ); assert_shape_2d (t28, n_ff, N*n_batch);
412
- struct ggml_tensor * t29 = ggml_mul_mat (ctx, layer.w2 , t28); set_name (t29, " t29" ); assert_shape_2d (t29, n_embd, N*n_batch);
412
+ struct ggml_tensor * t29 = ggml_mul_mat (ctx, layer.ffn_down , t28); set_name (t29, " t29" ); assert_shape_2d (t29, n_embd, N*n_batch);
413
413
struct ggml_tensor * t30 = ggml_add (ctx, t29, t21); set_name (t30, " t30" ); assert_shape_2d (t30, n_embd, N*n_batch);
414
414
cur = t30;
415
415
checkpoints.push_back (cur);
@@ -560,9 +560,9 @@ static void load_llama_model_gguf(struct gguf_context * fctx, struct ggml_contex
560
560
copy_tensor_by_name (layer.wv , f_ggml_ctx, tni (LLM_TENSOR_ATTN_V, i));
561
561
copy_tensor_by_name (layer.wo , f_ggml_ctx, tni (LLM_TENSOR_ATTN_OUT, i));
562
562
copy_tensor_by_name (layer.ffn_norm , f_ggml_ctx, tni (LLM_TENSOR_FFN_NORM, i));
563
- copy_tensor_by_name (layer.w1 , f_ggml_ctx, tni (LLM_TENSOR_FFN_GATE, i));
564
- copy_tensor_by_name (layer.w2 , f_ggml_ctx, tni (LLM_TENSOR_FFN_DOWN, i));
565
- copy_tensor_by_name (layer.w3 , f_ggml_ctx, tni (LLM_TENSOR_FFN_UP, i));
563
+ copy_tensor_by_name (layer.ffn_gate , f_ggml_ctx, tni (LLM_TENSOR_FFN_GATE, i));
564
+ copy_tensor_by_name (layer.ffn_down , f_ggml_ctx, tni (LLM_TENSOR_FFN_DOWN, i));
565
+ copy_tensor_by_name (layer.ffn_up , f_ggml_ctx, tni (LLM_TENSOR_FFN_UP, i));
566
566
}
567
567
}
568
568
@@ -703,9 +703,9 @@ static void save_llama_model_gguf(struct gguf_context * fctx, const char * fn_vo
703
703
gguf_add_tensor (fctx, layer.wv );
704
704
gguf_add_tensor (fctx, layer.wo );
705
705
gguf_add_tensor (fctx, layer.ffn_norm );
706
- gguf_add_tensor (fctx, layer.w1 );
707
- gguf_add_tensor (fctx, layer.w2 );
708
- gguf_add_tensor (fctx, layer.w3 );
706
+ gguf_add_tensor (fctx, layer.ffn_gate );
707
+ gguf_add_tensor (fctx, layer.ffn_down );
708
+ gguf_add_tensor (fctx, layer.ffn_up );
709
709
}
710
710
}
711
711
@@ -954,9 +954,9 @@ static int64_t get_parameter_count(struct my_llama_model* model) {
954
954
nx += ggml_nelements (layer.wv );
955
955
nx += ggml_nelements (layer.wo );
956
956
nx += ggml_nelements (layer.ffn_norm );
957
- nx += ggml_nelements (layer.w1 );
958
- nx += ggml_nelements (layer.w2 );
959
- nx += ggml_nelements (layer.w3 );
957
+ nx += ggml_nelements (layer.ffn_gate );
958
+ nx += ggml_nelements (layer.ffn_down );
959
+ nx += ggml_nelements (layer.ffn_up );
960
960
}
961
961
return nx;
962
962
}
0 commit comments