@@ -12873,12 +12873,12 @@ struct llm_build_context {
12873
12873
struct ggml_tensor * ffn_inp = ggml_add(ctx0, cur, inpSA);
12874
12874
cb(ffn_inp, "ffn_inp", il);
12875
12875
12876
- if ((uint32_t) il < hparams.n_layer_dense_lead) {
12877
- cur = llm_build_norm(ctx0, ffn_inp, hparams,
12878
- model.layers[il].ffn_norm, NULL,
12879
- LLM_NORM_RMS, cb, il);
12880
- cb(cur, "ffn_norm", il);
12876
+ cur = llm_build_norm(ctx0, ffn_inp, hparams,
12877
+ model.layers[il].ffn_norm, NULL,
12878
+ LLM_NORM_RMS, cb, il);
12879
+ cb(cur, "ffn_norm", il);
12881
12880
12881
+ if ((uint32_t) il < hparams.n_layer_dense_lead) {
12882
12882
cur = llm_build_ffn(ctx0, cur,
12883
12883
model.layers[il].ffn_up, NULL, NULL,
12884
12884
model.layers[il].ffn_gate, NULL, NULL,
@@ -12888,11 +12888,6 @@ struct llm_build_context {
12888
12888
cb(cur, "ffn_out", il);
12889
12889
} else {
12890
12890
// MoE branch
12891
- cur = llm_build_norm(ctx0, ffn_inp, hparams,
12892
- model.layers[il].ffn_norm, NULL,
12893
- LLM_NORM_RMS, cb, il);
12894
- cb(cur, "ffn_norm", il);
12895
-
12896
12891
ggml_tensor * moe_out =
12897
12892
llm_build_moe_ffn(ctx0, cur,
12898
12893
model.layers[il].ffn_gate_inp,
0 commit comments