Skip to content

Commit 6f28a33

Browse files
zkh2016zhangkaihuo
andauthored
llama : MiniCPM support tied embeddings (#7664)
* support lm_head * remove the code block --------- Co-authored-by: zhangkaihuo <[email protected]>
1 parent 549279d commit 6f28a33

File tree

2 files changed

+6
-7
lines changed

2 files changed

+6
-7
lines changed

gguf-py/gguf/constants.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -645,6 +645,7 @@ class MODEL_TENSOR(IntEnum):
645645
],
646646
MODEL_ARCH.MINICPM: [
647647
MODEL_TENSOR.TOKEN_EMBD,
648+
MODEL_TENSOR.OUTPUT,
648649
MODEL_TENSOR.OUTPUT_NORM,
649650
MODEL_TENSOR.ROPE_FREQS,
650651
MODEL_TENSOR.ATTN_NORM,

llama.cpp

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5124,12 +5124,10 @@ static bool llm_load_tensors(
51245124
// output
51255125
{
51265126
model.output_norm = ml.create_tensor(ctx_output, tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd});
5127-
if (model.arch != LLM_ARCH_MINICPM){
5128-
model.output = ml.create_tensor(ctx_output_split, tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab}, llama_model_loader::TENSOR_NOT_REQUIRED);
5129-
// if output is NULL, init from the input tok embed
5130-
if (model.output == NULL) {
5131-
model.output = ml.create_tensor(ctx_output, tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, llama_model_loader::TENSOR_DUPLICATED);
5132-
}
5127+
model.output = ml.create_tensor(ctx_output_split, tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab}, llama_model_loader::TENSOR_NOT_REQUIRED);
5128+
// if output is NULL, init from the input tok embed
5129+
if (model.output == NULL) {
5130+
model.output = ml.create_tensor(ctx_output, tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, llama_model_loader::TENSOR_DUPLICATED);
51335131
}
51345132
}
51355133

@@ -10212,7 +10210,7 @@ struct llm_build_context {
1021210210
cb(cur, "lmhead_scaling", -1);
1021310211

1021410212
// lm_head
10215-
cur = ggml_mul_mat(ctx0, model.tok_embd, cur);
10213+
cur = ggml_mul_mat(ctx0, model.output, cur);
1021610214
cb(cur, "result_output", -1);
1021710215

1021810216
ggml_build_forward_expand(gf, cur);

0 commit comments

Comments
 (0)