@@ -1005,6 +1005,7 @@ void llama_model::load_hparams(llama_model_loader & ml) {
1005
1005
case 16 : type = LLM_TYPE_1B; break ;
1006
1006
case 32 : type = LLM_TYPE_7B; break ;
1007
1007
case 40 : type = LLM_TYPE_13B; break ;
1008
+ case 64 : type = LLM_TYPE_32B; break ;
1008
1009
default : type = LLM_TYPE_UNKNOWN;
1009
1010
}
1010
1011
} break ;
@@ -2726,6 +2727,8 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
2726
2727
} break ;
2727
2728
case LLM_ARCH_OLMO2:
2728
2729
{
2730
+ const int64_t n_embd_head = n_embd / n_head;
2731
+
2729
2732
tok_embd = create_tensor (tn (LLM_TENSOR_TOKEN_EMBD, " weight" ), {n_embd, n_vocab}, 0 );
2730
2733
2731
2734
// output
@@ -2740,7 +2743,7 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
2740
2743
layer.wv = create_tensor (tn (LLM_TENSOR_ATTN_V, " weight" , i), {n_embd, n_embd_gqa}, 0 );
2741
2744
layer.wo = create_tensor (tn (LLM_TENSOR_ATTN_OUT, " weight" , i), {n_embd, n_embd}, 0 );
2742
2745
layer.attn_q_norm = create_tensor (tn (LLM_TENSOR_ATTN_Q_NORM, " weight" , i), {n_embd}, 0 );
2743
- layer.attn_k_norm = create_tensor (tn (LLM_TENSOR_ATTN_K_NORM, " weight" , i), {n_embd }, 0 );
2746
+ layer.attn_k_norm = create_tensor (tn (LLM_TENSOR_ATTN_K_NORM, " weight" , i), {n_head_kv * n_embd_head }, 0 );
2744
2747
layer.attn_post_norm = create_tensor (tn (LLM_TENSOR_ATTN_POST_NORM, " weight" , i), {n_embd}, 0 );
2745
2748
2746
2749
layer.ffn_gate = create_tensor (tn (LLM_TENSOR_FFN_GATE, " weight" , i), {n_embd, n_ff}, 0 );
0 commit comments