Skip to content

Commit c104023

Browse files
authored
mtmd : Use RMS norm for InternVL 3 38B and 78B mmproj (#13459)
1 parent 9a390c4 commit c104023

File tree

1 file changed

+7
-1
lines changed

1 file changed

+7
-1
lines changed

tools/mtmd/clip.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -879,9 +879,15 @@ struct clip_graph {
879879
// add CLS token
880880
inp = ggml_concat(ctx0, inp, model.class_embedding, 1);
881881

882+
// The larger models use a different ViT, which uses RMS norm instead of layer norm
883+
// ref: https://github.com/ggml-org/llama.cpp/pull/13443#issuecomment-2869786188
884+
norm_type norm_t = (hparams.n_embd == 3200 && hparams.n_layer == 45)
885+
? NORM_TYPE_RMS // 6B ViT (Used by InternVL 2.5/3 - 26B, 38B, 78B)
886+
: NORM_TYPE_NORMAL; // 300M ViT (Used by all smaller InternVL models)
887+
882888
ggml_tensor * cur = build_vit(
883889
inp, n_pos,
884-
NORM_TYPE_NORMAL,
890+
norm_t,
885891
hparams.ffn_op,
886892
model.position_embeddings,
887893
nullptr);

0 commit comments

Comments
 (0)