Skip to content

Commit 489a0e8

Browse files
committed
Nomic BERT
1 parent f281d76 commit 489a0e8

File tree

4 files changed

+242
-74
lines changed

4 files changed

+242
-74
lines changed

convert-hf-to-gguf.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,8 @@ def from_model_architecture(model_architecture):
211211
return MiniCPMModel
212212
if model_architecture == "BertModel":
213213
return BertModel
214+
if model_architecture == "NomicBertModel":
215+
return NomicBertModel
214216
return Model
215217

216218
def _is_model_safetensors(self) -> bool:
@@ -268,6 +270,8 @@ def _get_model_architecture(self) -> gguf.MODEL_ARCH:
268270
return gguf.MODEL_ARCH.MINICPM
269271
if arch == "BertModel":
270272
return gguf.MODEL_ARCH.BERT
273+
if arch == "NomicBertModel":
274+
return gguf.MODEL_ARCH.NOMIC_BERT
271275

272276
raise NotImplementedError(f'Architecture "{arch}" not supported!')
273277

@@ -1637,6 +1641,7 @@ class BertModel(Model):
16371641
def __init__(self, *args, **kwargs):
16381642
super().__init__(*args, **kwargs)
16391643
self.block_count = self.hparams["num_hidden_layers"]
1644+
self.vocab_size = None
16401645

16411646
def set_gguf_parameters(self):
16421647
# TODO(cebtenzzre): merge with parent class
@@ -1659,6 +1664,7 @@ def set_vocab(self):
16591664
vocab = HfVocab(path, added_tokens_path)
16601665
tokens, scores, toktypes = zip(*vocab.all_tokens())
16611666
assert len(tokens) == vocab.vocab_size
1667+
self.vocab_size = vocab.vocab_size
16621668

16631669
# we need this to validate the size of the token_type embeddings
16641670
# though currently we are passing all zeros to the token_type embeddings
@@ -1724,6 +1730,47 @@ def write_tensors(self):
17241730
self.gguf_writer.add_tensor(new_name, data)
17251731

17261732

1733+
class NomicBertModel(BertModel):
1734+
def __init__(self, *args, **kwargs):
1735+
Model.__init__(self, *args, **kwargs)
1736+
self.block_count = self.hparams["n_layer"]
1737+
assert self.hparams["activation_function"] == "swiglu"
1738+
assert self.hparams["causal"] is False # True is untested
1739+
assert self.hparams["qkv_proj_bias"] is False
1740+
assert self.hparams["mlp_fc1_bias"] is False
1741+
assert self.hparams["mlp_fc2_bias"] is False
1742+
assert self.hparams["prenorm"] is False
1743+
assert self.hparams["rotary_emb_fraction"] == 1.0
1744+
assert self.hparams["rotary_emb_interleaved"] is False
1745+
assert self.hparams["rotary_emb_scale_base"] is None
1746+
1747+
def set_gguf_parameters(self):
1748+
# TODO(cebtenzzre): merge with parent class
1749+
self.gguf_writer.add_name(self.dir_model.name)
1750+
# the HF config claims n_ctx=8192, but it uses RoPE scaling
1751+
self.gguf_writer.add_context_length(2048)
1752+
self.gguf_writer.add_embedding_length(self.hparams["n_embd"])
1753+
self.gguf_writer.add_feed_forward_length(self.hparams["n_inner"])
1754+
self.gguf_writer.add_block_count(self.block_count)
1755+
self.gguf_writer.add_head_count(self.hparams["n_head"])
1756+
self.gguf_writer.add_layer_norm_eps(self.hparams["layer_norm_epsilon"])
1757+
self.gguf_writer.add_causal_attention(self.hparams["causal"])
1758+
self.gguf_writer.add_rope_freq_base(self.hparams["rotary_emb_base"])
1759+
self.gguf_writer.add_pooling_layer(True)
1760+
self.gguf_writer.add_file_type(self.ftype)
1761+
1762+
def get_tensors(self):
1763+
for name, data in super().get_tensors():
1764+
# Nomic Embed's token embeddings tensor is padded, but llama.cpp wants tensor sizes to match exactly.
1765+
if name == 'embeddings.word_embeddings.weight' and data.shape[1] != self.vocab_size:
1766+
rounded_vocab_size = (self.vocab_size + 7) // 8 * 8
1767+
print(data.shape)
1768+
print(rounded_vocab_size, self.hparams["n_embd"])
1769+
assert data.shape == (rounded_vocab_size, self.hparams["n_embd"])
1770+
data = data[:self.vocab_size, :]
1771+
yield name, data
1772+
1773+
17271774
###### CONVERSION LOGIC ######
17281775

17291776

gguf-py/gguf/constants.py

Lines changed: 36 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -87,27 +87,28 @@ class Tokenizer:
8787

8888

8989
class MODEL_ARCH(IntEnum):
90-
LLAMA = auto()
91-
FALCON = auto()
92-
BAICHUAN = auto()
93-
GPT2 = auto()
94-
GPTJ = auto()
95-
GPTNEOX = auto()
96-
MPT = auto()
97-
STARCODER = auto()
98-
PERSIMMON = auto()
99-
REFACT = auto()
100-
BERT = auto()
101-
BLOOM = auto()
102-
STABLELM = auto()
103-
QWEN = auto()
104-
QWEN2 = auto()
105-
PHI2 = auto()
106-
PLAMO = auto()
107-
CODESHELL = auto()
108-
ORION = auto()
90+
LLAMA = auto()
91+
FALCON = auto()
92+
BAICHUAN = auto()
93+
GPT2 = auto()
94+
GPTJ = auto()
95+
GPTNEOX = auto()
96+
MPT = auto()
97+
STARCODER = auto()
98+
PERSIMMON = auto()
99+
REFACT = auto()
100+
BERT = auto()
101+
NOMIC_BERT = auto()
102+
BLOOM = auto()
103+
STABLELM = auto()
104+
QWEN = auto()
105+
QWEN2 = auto()
106+
PHI2 = auto()
107+
PLAMO = auto()
108+
CODESHELL = auto()
109+
ORION = auto()
109110
INTERNLM2 = auto()
110-
MINICPM = auto()
111+
MINICPM = auto()
111112

112113

113114
class MODEL_TENSOR(IntEnum):
@@ -153,6 +154,7 @@ class MODEL_TENSOR(IntEnum):
153154
MODEL_ARCH.PERSIMMON: "persimmon",
154155
MODEL_ARCH.REFACT: "refact",
155156
MODEL_ARCH.BERT: "bert",
157+
MODEL_ARCH.NOMIC_BERT: "nomic-bert",
156158
MODEL_ARCH.BLOOM: "bloom",
157159
MODEL_ARCH.STABLELM: "stablelm",
158160
MODEL_ARCH.QWEN: "qwen",
@@ -282,6 +284,20 @@ class MODEL_TENSOR(IntEnum):
282284
MODEL_TENSOR.FFN_UP,
283285
MODEL_TENSOR.LAYER_OUT_NORM,
284286
],
287+
MODEL_ARCH.NOMIC_BERT: [
288+
MODEL_TENSOR.TOKEN_EMBD,
289+
MODEL_TENSOR.TOKEN_EMBD_NORM,
290+
MODEL_TENSOR.TOKEN_TYPES,
291+
MODEL_TENSOR.POS_EMBD,
292+
MODEL_TENSOR.OUTPUT_NORM,
293+
MODEL_TENSOR.ATTN_OUT_NORM,
294+
MODEL_TENSOR.ATTN_QKV,
295+
MODEL_TENSOR.ATTN_OUT,
296+
MODEL_TENSOR.FFN_GATE,
297+
MODEL_TENSOR.FFN_DOWN,
298+
MODEL_TENSOR.FFN_UP,
299+
MODEL_TENSOR.LAYER_OUT_NORM,
300+
],
285301
MODEL_ARCH.MPT: [
286302
MODEL_TENSOR.TOKEN_EMBD,
287303
MODEL_TENSOR.OUTPUT_NORM,

gguf-py/gguf/tensor_mapping.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ class TensorNameMap:
1515
"word_embeddings", # bloom
1616
"model.embed_tokens", # llama-hf
1717
"tok_embeddings", # llama-pth
18-
"embeddings.word_embeddings", # bert
18+
"embeddings.word_embeddings", # bert nomic-bert
1919
"language_model.embedding.word_embeddings", # persimmon
2020
"wte", # gpt2
2121
"transformer.embd.wte", # phi2
@@ -24,13 +24,14 @@ class TensorNameMap:
2424

2525
# Token type embeddings
2626
MODEL_TENSOR.TOKEN_TYPES: (
27-
"embeddings.token_type_embeddings", # bert
27+
"embeddings.token_type_embeddings", # bert nomic-bert
2828
),
2929

3030
# Normalization of token embeddings
3131
MODEL_TENSOR.TOKEN_EMBD_NORM: (
3232
"word_embeddings_layernorm", # bloom
3333
"embeddings.LayerNorm", # bert
34+
"emb_ln", # nomic-bert
3435
),
3536

3637
# Position embeddings
@@ -103,6 +104,7 @@ class TensorNameMap:
103104
"model.layers.{bid}.self_attn.query_key_value", # persimmon
104105
"h.{bid}.attn.c_attn", # gpt2
105106
"transformer.h.{bid}.mixer.Wqkv", # phi2
107+
"encoder.layers.{bid}.attn.Wqkv", # nomic-bert
106108
),
107109

108110
# Attention query
@@ -152,11 +154,13 @@ class TensorNameMap:
152154
"transformer.h.{bid}.mixer.out_proj", # phi2
153155
"model.layers.layers.{bid}.self_attn.o_proj", # plamo
154156
"model.layers.{bid}.attention.wo", # internlm2
157+
"encoder.layers.{bid}.attn.out_proj", # nomic-bert
155158
),
156159

157160
# Attention output norm
158161
MODEL_TENSOR.ATTN_OUT_NORM: (
159162
"encoder.layer.{bid}.attention.output.LayerNorm", # bert
163+
"encoder.layers.{bid}.norm1", # nomic-bert
160164
),
161165

162166
# Rotary embeddings
@@ -205,6 +209,7 @@ class TensorNameMap:
205209
"model.layers.{bid}.mlp.fc1", # phi2
206210
"model.layers.layers.{bid}.mlp.up_proj", # plamo
207211
"model.layers.{bid}.feed_forward.w3", # internlm2
212+
"encoder.layers.{bid}.mlp.fc11", # nomic-bert
208213
),
209214

210215
MODEL_TENSOR.FFN_UP_EXP: (
@@ -224,6 +229,7 @@ class TensorNameMap:
224229
"transformer.h.{bid}.mlp.w2", # qwen
225230
"model.layers.layers.{bid}.mlp.gate_proj", # plamo
226231
"model.layers.{bid}.feed_forward.w1", # internlm2
232+
"encoder.layers.{bid}.mlp.fc12", # nomic-bert
227233
),
228234

229235
MODEL_TENSOR.FFN_GATE_EXP: (
@@ -249,6 +255,7 @@ class TensorNameMap:
249255
"model.layers.{bid}.mlp.fc2", # phi2
250256
"model.layers.layers.{bid}.mlp.down_proj", # plamo
251257
"model.layers.{bid}.feed_forward.w2", # internlm2
258+
"encoder.layers.{bid}.mlp.fc2", # nomic-bert
252259
),
253260

254261
MODEL_TENSOR.FFN_DOWN_EXP: (
@@ -272,6 +279,7 @@ class TensorNameMap:
272279

273280
MODEL_TENSOR.LAYER_OUT_NORM: (
274281
"encoder.layer.{bid}.output.LayerNorm", # bert
282+
"encoder.layers.{bid}.norm2", # nomic-bert
275283
)
276284
}
277285

0 commit comments

Comments
 (0)