@@ -4870,7 +4870,7 @@ static bool llm_load_tensors(
4870
4870
model.tok_embd = ml.create_tensor(ctx_input, tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}); // word_embeddings
4871
4871
model.type_embd = ml.create_tensor(ctx_input, tn(LLM_TENSOR_TOKEN_TYPES, "weight"), {n_embd, n_vocab_type}); //token_type_embeddings
4872
4872
model.tok_norm = ml.create_tensor(ctx_output, tn(LLM_TENSOR_TOKEN_EMBD_NORM, "weight"), {n_embd}); // LayerNorm
4873
- model.tok_norm_b = ml.create_tensor(ctx_output, tn(LLM_TENSOR_TOKEN_EMBD_NORM, "bias"), {n_embd}); //LayerNorm bias? Not sure needed
4873
+ model.tok_norm_b = ml.create_tensor(ctx_output, tn(LLM_TENSOR_TOKEN_EMBD_NORM, "bias"), {n_embd}); //LayerNorm bias
4874
4874
4875
4875
for (int i = 0; i < n_layer; ++i) {
4876
4876
ggml_context * ctx_layer = ctx_for_layer(i);
@@ -4893,8 +4893,8 @@ static bool llm_load_tensors(
4893
4893
layer.attn_out_norm = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_OUT_NORM, "weight", i), {n_embd}); //output_norm
4894
4894
layer.attn_out_norm_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_OUT_NORM, "bias", i), {n_embd});
4895
4895
4896
- // TODO: HANDLE ALL THE MLP
4897
- layer.ffn_up = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_UP , "weight", i), {n_embd, 2 * n_ff});
4896
+ layer.ffn_up = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, n_ff});
4897
+ layer.ffn_gate = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_GATE , "weight", i), {n_embd, n_ff});
4898
4898
4899
4899
layer.ffn_down = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_DOWN, "weight", i), {n_ff, n_embd});
4900
4900
layer.ffn_down_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_DOWN, "bias", i), {n_embd});
@@ -5851,7 +5851,7 @@ static struct ggml_tensor * llm_build_ffn(
5851
5851
llm_ffn_gate_type type_gate,
5852
5852
const llm_build_cb & cb,
5853
5853
int il) {
5854
- struct ggml_tensor * tmp = ggml_mul_mat(ctx, up, cur);
5854
+ struct ggml_tensor * tmp = up ? ggml_mul_mat(ctx, up, cur): cur ;
5855
5855
cb(tmp, "ffn_up", il);
5856
5856
5857
5857
if (up_b) {
@@ -7522,8 +7522,11 @@ struct llm_build_context {
7522
7522
7523
7523
struct ggml_tensor * cur;
7524
7524
struct ggml_tensor * inpL;
7525
+ struct ggml_tensor * inp_pos = nullptr;
7525
7526
7526
- struct ggml_tensor * inp_pos = build_inp_pos();
7527
+ if (model.arch != LLM_ARCH_JINA_BERT) {
7528
+ inp_pos = build_inp_pos();
7529
+ }
7527
7530
struct ggml_tensor * inp_mean = build_inp_mean();
7528
7531
struct ggml_tensor * inp_cls = build_inp_cls();
7529
7532
@@ -7644,13 +7647,20 @@ struct llm_build_context {
7644
7647
cb(ffn_inp, "ffn_inp", il);
7645
7648
7646
7649
// feed-forward network
7647
- if (model.arch == LLM_ARCH_BERT || model.arch == LLM_ARCH_JINA_BERT ) {
7650
+ if (model.arch == LLM_ARCH_BERT) {
7648
7651
cur = llm_build_ffn(ctx0, cur,
7649
7652
model.layers[il].ffn_up, model.layers[il].ffn_up_b,
7650
7653
NULL, NULL,
7651
7654
model.layers[il].ffn_down, model.layers[il].ffn_down_b,
7652
7655
NULL,
7653
7656
LLM_FFN_GELU, LLM_FFN_SEQ, cb, il);
7657
+ } else if (model.arch == LLM_ARCH_JINA_BERT) {
7658
+ cur = llm_build_ffn(ctx0, cur,
7659
+ model.layers[il].ffn_up, NULL,
7660
+ model.layers[il].ffn_gate, NULL,
7661
+ model.layers[il].ffn_down, model.layers[il].ffn_down_b,
7662
+ NULL,
7663
+ LLM_FFN_GELU, LLM_FFN_PAR, cb, il);
7654
7664
} else {
7655
7665
cur = llm_build_ffn(ctx0, cur,
7656
7666
model.layers[il].ffn_up, NULL,
0 commit comments