@@ -2695,8 +2695,8 @@ static void llm_load_tensors(
2695
2695
} break ;
2696
2696
case LLM_ARCH_STARCODER:
2697
2697
{
2698
- model.tok_embeddings = ml.create_tensor (ctx, tn (LLM_TENSOR_TOKEN_EMBD, " weight" ), {n_embd, n_vocab}, GGML_BACKEND_CPU);
2699
- model.pos_embeddings = ml.create_tensor (ctx, tn (LLM_TENSOR_POS_EMBD, " weight" ), {n_embd, hparams.n_ctx_train }, GGML_BACKEND_CPU);
2698
+ model.tok_embeddings = ml.create_tensor (ctx, tn (LLM_TENSOR_TOKEN_EMBD, " weight" ), {n_embd, n_vocab}, GGML_BACKEND_CPU);
2699
+ model.pos_embeddings = ml.create_tensor (ctx, tn (LLM_TENSOR_POS_EMBD, " weight" ), {n_embd, hparams.n_ctx_train }, GGML_BACKEND_CPU);
2700
2700
2701
2701
// output
2702
2702
{
@@ -2747,19 +2747,19 @@ static void llm_load_tensors(
2747
2747
layer.attn_norm_b = ml.create_tensor (ctx, tn (LLM_TENSOR_ATTN_NORM, " bias" , i), {n_embd}, backend);
2748
2748
2749
2749
layer.wqkv = ml.create_tensor (ctx, tn (LLM_TENSOR_ATTN_QKV, " weight" , i), {n_embd, n_embd + 2 *n_embd_gqa}, backend_split);
2750
- layer.bqkv = ml.create_tensor (ctx, tn (LLM_TENSOR_ATTN_QKV, " bias" , i), {n_embd + 2 *n_embd_gqa}, backend_split );
2750
+ layer.bqkv = ml.create_tensor (ctx, tn (LLM_TENSOR_ATTN_QKV, " bias" , i), {n_embd + 2 *n_embd_gqa}, backend );
2751
2751
2752
2752
layer.wo = ml.create_tensor (ctx, tn (LLM_TENSOR_ATTN_OUT, " weight" , i), {n_embd, n_embd}, backend_split);
2753
- layer.bo = ml.create_tensor (ctx, tn (LLM_TENSOR_ATTN_OUT, " bias" , i), {n_embd}, backend_split );
2753
+ layer.bo = ml.create_tensor (ctx, tn (LLM_TENSOR_ATTN_OUT, " bias" , i), {n_embd}, backend );
2754
2754
2755
2755
layer.ffn_norm = ml.create_tensor (ctx, tn (LLM_TENSOR_FFN_NORM, " weight" , i), {n_embd}, backend);
2756
2756
layer.ffn_norm_b = ml.create_tensor (ctx, tn (LLM_TENSOR_FFN_NORM, " bias" , i), {n_embd}, backend);
2757
2757
2758
2758
layer.w2 = ml.create_tensor (ctx, tn (LLM_TENSOR_FFN_DOWN, " weight" , i), {n_ff, n_embd}, backend_split);
2759
- layer.b2 = ml.create_tensor (ctx, tn (LLM_TENSOR_FFN_DOWN, " bias" , i), {n_embd}, backend_split );
2759
+ layer.b2 = ml.create_tensor (ctx, tn (LLM_TENSOR_FFN_DOWN, " bias" , i), {n_embd}, backend );
2760
2760
2761
2761
layer.w3 = ml.create_tensor (ctx, tn (LLM_TENSOR_FFN_UP, " weight" , i), {n_embd, n_ff}, backend_split);
2762
- layer.b3 = ml.create_tensor (ctx, tn (LLM_TENSOR_FFN_UP, " bias" , i), {n_ff}, backend_split );
2762
+ layer.b3 = ml.create_tensor (ctx, tn (LLM_TENSOR_FFN_UP, " bias" , i), {n_ff}, backend );
2763
2763
2764
2764
if (backend == GGML_BACKEND_GPU) {
2765
2765
vram_weights +=
@@ -4718,7 +4718,7 @@ static struct ggml_cgraph * llm_build_starcoder(
4718
4718
// Self Attention
4719
4719
cur = ggml_add (ctx0, ggml_mul_mat (ctx0, model.layers [il].wqkv , cur), model.layers [il].bqkv );
4720
4720
4721
- struct ggml_tensor * tmpq = ggml_view_2d (ctx0, cur, n_embd, n_tokens, cur->nb [1 ], 0 *sizeof (float )*n_embd);
4721
+ struct ggml_tensor * tmpq = ggml_view_2d (ctx0, cur, n_embd, n_tokens, cur->nb [1 ], 0 *sizeof (float )*n_embd);
4722
4722
struct ggml_tensor * tmpk = ggml_view_2d (ctx0, cur, n_embd_gqa, n_tokens, cur->nb [1 ], sizeof (float )*n_embd);
4723
4723
struct ggml_tensor * tmpv = ggml_view_2d (ctx0, cur, n_embd_gqa, n_tokens, cur->nb [1 ], sizeof (float )*(n_embd + n_embd_gqa));
4724
4724
0 commit comments