@@ -6176,9 +6176,9 @@ static bool llm_load_tensors(
6176
6176
layer.ffn_up = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, n_ff});
6177
6177
6178
6178
// optional MLP bias
6179
- layer.ffn_gate_b = ml.create_tensor(ctx_split , tn(LLM_TENSOR_FFN_GATE, "bias", i), {n_ff}, llama_model_loader::TENSOR_NOT_REQUIRED);
6180
- layer.ffn_down_b = ml.create_tensor(ctx_split , tn(LLM_TENSOR_FFN_DOWN, "bias", i), {n_embd}, llama_model_loader::TENSOR_NOT_REQUIRED);
6181
- layer.ffn_up_b = ml.create_tensor(ctx_split , tn(LLM_TENSOR_FFN_UP, "bias", i), {n_ff}, llama_model_loader::TENSOR_NOT_REQUIRED);
6179
+ layer.ffn_gate_b = ml.create_tensor(ctx_layer , tn(LLM_TENSOR_FFN_GATE, "bias", i), {n_ff}, llama_model_loader::TENSOR_NOT_REQUIRED);
6180
+ layer.ffn_down_b = ml.create_tensor(ctx_layer , tn(LLM_TENSOR_FFN_DOWN, "bias", i), {n_embd}, llama_model_loader::TENSOR_NOT_REQUIRED);
6181
+ layer.ffn_up_b = ml.create_tensor(ctx_layer , tn(LLM_TENSOR_FFN_UP, "bias", i), {n_ff}, llama_model_loader::TENSOR_NOT_REQUIRED);
6182
6182
} else {
6183
6183
layer.ffn_gate_inp = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_GATE_INP, "weight", i), {n_embd, n_expert});
6184
6184
@@ -6502,7 +6502,7 @@ static bool llm_load_tensors(
6502
6502
layer.bv = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_V, "bias", i), {n_embd_gqa});
6503
6503
6504
6504
layer.wo = ml.create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_OUT, "weight", i), {n_embd, n_embd}); //output_dens
6505
- layer.bo = ml.create_tensor(ctx_split , tn(LLM_TENSOR_ATTN_OUT, "bias", i), {n_embd}); //output_dens
6505
+ layer.bo = ml.create_tensor(ctx_layer , tn(LLM_TENSOR_ATTN_OUT, "bias", i), {n_embd}); //output_dens
6506
6506
6507
6507
layer.attn_out_norm = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_OUT_NORM, "weight", i), {n_embd}); //output_norm
6508
6508
layer.attn_out_norm_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_OUT_NORM, "bias", i), {n_embd});
0 commit comments