Skip to content

Commit bb43cf7

Browse files
bryanSwkbryanggerganov
authored
llama : add SEA-LION support (#6448)
* initial commit for sealion support * add sealion support * minor fix * q/k ln and pos_embd only if required * Apply suggestions from code review Co-authored-by: Georgi Gerganov <[email protected]> * minor : clear whitespaces --------- Co-authored-by: bryan <[email protected]> Co-authored-by: Georgi Gerganov <[email protected]>
1 parent 9f62c01 commit bb43cf7

File tree

5 files changed

+65
-4
lines changed

5 files changed

+65
-4
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,7 @@ Typically finetunes of the base models below are supported as well.
118118
- [x] [Mamba](https://github.com/state-spaces/mamba)
119119
- [x] [Xverse](https://huggingface.co/models?search=xverse)
120120
- [x] [Command-R](https://huggingface.co/CohereForAI/c4ai-command-r-v01)
121+
- [x] [SEA-LION](https://huggingface.co/models?search=sea-lion)
121122

122123
**Multimodal models:**
123124

convert-hf-to-gguf.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -510,6 +510,16 @@ def write_tensors(self):
510510
class MPTModel(Model):
511511
model_arch = gguf.MODEL_ARCH.MPT
512512

513+
def set_vocab(self):
514+
try:
515+
self._set_vocab_gpt2()
516+
except:
517+
self._set_vocab_sentencepiece()
518+
self.gguf_writer.add_add_bos_token(False)
519+
self.gguf_writer.add_pad_token_id(3)
520+
self.gguf_writer.add_eos_token_id(1)
521+
self.gguf_writer.add_unk_token_id(0)
522+
513523
def set_gguf_parameters(self):
514524
block_count = self.hparams["n_layers"]
515525
self.gguf_writer.add_name(self.dir_model.name)
@@ -523,7 +533,10 @@ def set_gguf_parameters(self):
523533
self.gguf_writer.add_layer_norm_eps(1e-5)
524534
if self.hparams["attn_config"]["clip_qkv"] is not None:
525535
self.gguf_writer.add_clamp_kqv(self.hparams["attn_config"]["clip_qkv"])
526-
self.gguf_writer.add_max_alibi_bias(self.hparams["attn_config"]["alibi_bias_max"])
536+
if self.hparams["attn_config"]["alibi"]:
537+
self.gguf_writer.add_max_alibi_bias(self.hparams["attn_config"]["alibi_bias_max"])
538+
else:
539+
self.gguf_writer.add_max_alibi_bias(0.0)
527540

528541
def write_tensors(self):
529542
block_count = self.hparams.get("n_layers", self.hparams.get("num_hidden_layers"))

gguf-py/gguf/constants.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -367,6 +367,9 @@ class MODEL_TENSOR(IntEnum):
367367
MODEL_TENSOR.FFN_DOWN,
368368
MODEL_TENSOR.FFN_UP,
369369
MODEL_TENSOR.FFN_ACT,
370+
MODEL_TENSOR.ATTN_Q_NORM,
371+
MODEL_TENSOR.ATTN_K_NORM,
372+
MODEL_TENSOR.POS_EMBD,
370373
],
371374
MODEL_ARCH.GPTJ: [
372375
MODEL_TENSOR.TOKEN_EMBD,

gguf-py/gguf/tensor_mapping.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -285,11 +285,13 @@ class TensorNameMap:
285285
MODEL_TENSOR.ATTN_Q_NORM: (
286286
"language_model.encoder.layers.{bid}.self_attention.q_layernorm",
287287
"model.layers.{bid}.self_attn.q_layernorm", # persimmon
288+
"transformer.blocks.{bid}.attn.q_ln", # sea-lion
288289
),
289290

290291
MODEL_TENSOR.ATTN_K_NORM: (
291292
"language_model.encoder.layers.{bid}.self_attention.k_layernorm",
292293
"model.layers.{bid}.self_attn.k_layernorm", # persimmon
294+
"transformer.blocks.{bid}.attn.k_ln", # sea-lion
293295
),
294296

295297
MODEL_TENSOR.ROPE_FREQS: (

llama.cpp

Lines changed: 45 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -594,6 +594,9 @@ static const std::map<llm_arch, std::map<llm_tensor, std::string>> LLM_TENSOR_NA
594594
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
595595
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
596596
{ LLM_TENSOR_FFN_ACT, "blk.%d.ffn.act" },
597+
{ LLM_TENSOR_POS_EMBD, "position_embd" },
598+
{ LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm"},
599+
{ LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm"},
597600
},
598601
},
599602
{
@@ -4867,6 +4870,7 @@ static bool llm_load_tensors(
48674870
case LLM_ARCH_MPT:
48684871
{
48694872
model.tok_embd = ml.create_tensor(ctx_input, tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab});
4873+
model.pos_embd = ml.create_tensor(ctx_input, tn(LLM_TENSOR_POS_EMBD, "weight"), {n_embd, hparams.n_ctx_train}, false);
48704874

48714875
// output
48724876
{
@@ -4905,6 +4909,12 @@ static bool llm_load_tensors(
49054909
layer.ffn_up = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, n_ff});
49064910
layer.ffn_up_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_UP, "bias", i), {n_ff}, false);
49074911

4912+
layer.attn_q_norm = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_Q_NORM, "weight", i), {n_embd}, false);
4913+
layer.attn_q_norm_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_Q_NORM, "bias", i), {n_embd}, false);
4914+
4915+
layer.attn_k_norm = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_K_NORM, "weight", i), {n_embd}, false);
4916+
layer.attn_k_norm_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_K_NORM, "bias", i), {n_embd}, false);
4917+
49084918
// AWQ ScaleActivation layer
49094919
layer.ffn_act = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_ACT, "scales", i), {n_ff}, false);
49104920
}
@@ -7721,6 +7731,7 @@ struct llm_build_context {
77217731
GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
77227732

77237733
struct ggml_tensor * cur;
7734+
struct ggml_tensor * pos;
77247735
struct ggml_tensor * inpL;
77257736

77267737
inpL = llm_build_inp_embd(ctx0, lctx, hparams, batch, model.tok_embd, cb);
@@ -7731,6 +7742,16 @@ struct llm_build_context {
77317742
// positions of the tokens in the KV cache
77327743
struct ggml_tensor * KQ_pos = build_inp_KQ_pos();
77337744

7745+
if (model.pos_embd) {
7746+
// inp_pos - contains the positions
7747+
struct ggml_tensor * inp_pos = build_inp_pos();
7748+
pos = ggml_get_rows(ctx0, model.pos_embd, inp_pos);
7749+
cb(pos, "pos_embd", -1);
7750+
7751+
inpL = ggml_add(ctx0, inpL, pos);
7752+
cb(inpL, "inpL", -1);
7753+
}
7754+
77347755
for (int il = 0; il < n_layer; ++il) {
77357756
struct ggml_tensor * attn_norm;
77367757

@@ -7765,11 +7786,32 @@ struct llm_build_context {
77657786
cb(Kcur, "Kcur", il);
77667787
cb(Vcur, "Vcur", il);
77677788

7768-
Qcur = ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens);
7789+
// Q/K Layernorm
7790+
if (model.layers[il].attn_q_norm) {
7791+
Qcur = llm_build_norm(ctx0, Qcur, hparams,
7792+
model.layers[il].attn_q_norm,
7793+
model.layers[il].attn_q_norm_b,
7794+
LLM_NORM, cb, il);
7795+
cb(Qcur, "Qcur", il);
77697796

7770-
cur = llm_build_kv(ctx0, model, hparams, kv_self, gf,
7797+
Kcur = llm_build_norm(ctx0, Kcur, hparams,
7798+
model.layers[il].attn_k_norm,
7799+
model.layers[il].attn_k_norm_b,
7800+
LLM_NORM, cb, il);
7801+
cb(Kcur, "Kcur", il);
7802+
7803+
Qcur = ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens);
7804+
Kcur = ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens);
7805+
7806+
cur = llm_build_kv(ctx0, model, hparams, kv_self, gf,
77717807
model.layers[il].wo, model.layers[il].bo,
7772-
Kcur, Vcur, Qcur, KQ_mask, KQ_pos, n_ctx, n_tokens, kv_head, n_kv, 1.0f/sqrtf(float(n_embd_head)), cb, il);
7808+
Kcur, Vcur, Qcur, KQ_mask, nullptr, n_ctx, n_tokens, kv_head, n_kv, 1.0f/sqrtf(float(n_embd_head)), cb, il);
7809+
} else {
7810+
Qcur = ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens);
7811+
cur = llm_build_kv(ctx0, model, hparams, kv_self, gf,
7812+
model.layers[il].wo, model.layers[il].bo,
7813+
Kcur, Vcur, Qcur, KQ_mask, KQ_pos, n_ctx, n_tokens, kv_head, n_kv, 1.0f/sqrtf(float(n_embd_head)), cb, il);
7814+
}
77737815
}
77747816

77757817
if (il == n_layer - 1) {

0 commit comments

Comments
 (0)