Skip to content

Commit c1c0f4d

Browse files
author
Joan Martinez
committed
fix: fix convert formatting
1 parent db7e8ce commit c1c0f4d

File tree

1 file changed

+42
-52
lines changed

1 file changed

+42
-52
lines changed

convert-hf-to-gguf.py

Lines changed: 42 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -77,13 +77,11 @@ def get_tensors(self) -> Iterator[tuple[str, Tensor]]:
7777
for part_name in self.part_names:
7878
print(f"gguf: loading model part '{part_name}'")
7979
ctx: ContextManager[Any]
80-
8180
if self.is_safetensors:
8281
from safetensors import safe_open
8382
ctx = cast(ContextManager[Any], safe_open(self.dir_model / part_name, framework="pt", device="cpu"))
8483
else:
85-
ctx = contextlib.nullcontext(
86-
torch.load(str(self.dir_model / part_name), map_location="cpu", mmap=True, weights_only=True))
84+
ctx = contextlib.nullcontext(torch.load(str(self.dir_model / part_name), map_location="cpu", mmap=True, weights_only=True))
8785

8886
with ctx as model_part:
8987
for name in model_part.keys():
@@ -120,8 +118,7 @@ def set_gguf_parameters(self):
120118
if (f_rms_eps := self.hparams.get("rms_norm_eps")) is not None:
121119
self.gguf_writer.add_layer_norm_rms_eps(f_rms_eps)
122120
print(f"gguf: rms norm epsilon = {f_rms_eps}")
123-
if (f_norm_eps := self.find_hparam(["layer_norm_eps", "layer_norm_epsilon", "norm_epsilon"],
124-
optional=True)) is not None:
121+
if (f_norm_eps := self.find_hparam(["layer_norm_eps", "layer_norm_epsilon", "norm_epsilon"], optional=True)) is not None:
125122
self.gguf_writer.add_layer_norm_eps(f_norm_eps)
126123
print(f"gguf: layer norm epsilon = {f_norm_eps}")
127124
if (n_experts := self.hparams.get("num_local_experts")) is not None:
@@ -209,7 +206,6 @@ def func(modelcls: type[Model]):
209206
for name in names:
210207
cls._model_classes[name] = modelcls
211208
return modelcls
212-
213209
return func
214210

215211
@classmethod
@@ -294,7 +290,7 @@ def _set_vocab_qwen(self):
294290

295291
# for this kind of tokenizer, added_vocab is not a subset of vocab, so they need to be combined
296292
added_vocab = tokenizer.special_tokens
297-
reverse_vocab = {id_: encoded_tok for encoded_tok, id_ in (vocab | added_vocab).items()}
293+
reverse_vocab = {id_ : encoded_tok for encoded_tok, id_ in (vocab | added_vocab).items()}
298294

299295
for i in range(vocab_size):
300296
if i not in reverse_vocab:
@@ -779,8 +775,8 @@ def _reverse_hf_permute(self, weights: Tensor, n_head: int, n_kv_head: int | Non
779775

780776
return (
781777
weights.reshape(n_head, 2, weights.shape[0] // n_head // 2, *weights.shape[1:])
782-
.swapaxes(1, 2)
783-
.reshape(weights.shape)
778+
.swapaxes(1, 2)
779+
.reshape(weights.shape)
784780
)
785781

786782
def _reverse_hf_permute_part(
@@ -931,8 +927,8 @@ def _reverse_hf_permute(self, weights: Tensor, n_head: int, n_kv_head: int | Non
931927

932928
return (
933929
weights.reshape(n_head, 2, weights.shape[0] // n_head // 2, *weights.shape[1:])
934-
.swapaxes(1, 2)
935-
.reshape(weights.shape)
930+
.swapaxes(1, 2)
931+
.reshape(weights.shape)
936932
)
937933

938934

@@ -1209,8 +1205,7 @@ def set_gguf_parameters(self):
12091205
self.gguf_writer.add_block_count(block_count)
12101206
self.gguf_writer.add_feed_forward_length(hparams["intermediate_size"])
12111207
rotary_factor = self.find_hparam(["partial_rotary_factor", "rope_pct"])
1212-
self.gguf_writer.add_rope_dimension_count(
1213-
int(rotary_factor * (hparams["hidden_size"] // hparams["num_attention_heads"])))
1208+
self.gguf_writer.add_rope_dimension_count(int(rotary_factor * (hparams["hidden_size"] // hparams["num_attention_heads"])))
12141209
self.gguf_writer.add_head_count(hparams["num_attention_heads"])
12151210
self.gguf_writer.add_head_count_kv(hparams["num_key_value_heads"])
12161211
self.gguf_writer.add_parallel_residual(hparams["use_parallel_residual"] if "use_parallel_residual" in hparams else True)
@@ -1304,7 +1299,7 @@ class LlamaModel(Model):
13041299

13051300
def set_vocab(self):
13061301
try:
1307-
self._set_vocab_sentencepiece()
1302+
self. _set_vocab_sentencepiece()
13081303
except FileNotFoundError:
13091304
try:
13101305
self._set_vocab_llama_hf()
@@ -1653,8 +1648,8 @@ def _reverse_hf_permute(self, weights: Tensor, n_head: int, n_kv_head: int | Non
16531648

16541649
return (
16551650
weights.reshape(n_head, 2, weights.shape[0] // n_head // 2, *weights.shape[1:])
1656-
.swapaxes(1, 2)
1657-
.reshape(weights.shape)
1651+
.swapaxes(1, 2)
1652+
.reshape(weights.shape)
16581653
)
16591654

16601655
def write_tensors(self):
@@ -1914,8 +1909,7 @@ def write_tensors(self):
19141909

19151910
for name, data_torch in self.get_tensors():
19161911
# we don't need these
1917-
if name.endswith((".attention.masked_bias", ".attention.bias", ".attention.rotary_emb.inv_freq",
1918-
".attn.bias", ".attn.masked_bias")):
1912+
if name.endswith((".attention.masked_bias", ".attention.bias", ".attention.rotary_emb.inv_freq", ".attn.bias", ".attn.masked_bias")):
19191913
continue
19201914

19211915
if name.endswith((".c_attn.weight", ".c_proj.weight", ".c_fc.weight", ".c_proj.weight")):
@@ -2300,8 +2294,7 @@ def write_tensors(self):
23002294
bid = re.findall(qkv_pattern, name)[0]
23012295
qkv = data_torch
23022296
qkv = rearrange(qkv.T, " o (g n i) ->o g n i", g=num_groups, n=q_per_kv + 2, i=head_dim)
2303-
q, k, v = qkv[..., : q_per_kv, :], qkv[..., q_per_kv: q_per_kv + 1, :], qkv[...,
2304-
q_per_kv + 1: q_per_kv + 2, :]
2297+
q, k, v = qkv[..., : q_per_kv, :], qkv[..., q_per_kv: q_per_kv + 1, :], qkv[..., q_per_kv + 1: q_per_kv + 2, :]
23052298
# The model weights of q and k equire additional reshape.
23062299
q = self._hf_permute_qk(rearrange(q, " o g n i -> o (g n i)").T, num_heads, num_heads)
23072300
k = self._hf_permute_qk(rearrange(k, " o g n i -> o (g n i)").T, num_heads, num_kv_heads)
@@ -2384,7 +2377,6 @@ def write_tensors(self):
23842377

23852378
# map tensor names
23862379
new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias"))
2387-
23882380
if new_name is None:
23892381
print(f"Can not map tensor {name!r}")
23902382
sys.exit()
@@ -2441,31 +2433,6 @@ def set_gguf_parameters(self):
24412433
self.gguf_writer.add_rope_freq_base(self.hparams["rotary_emb_base"])
24422434

24432435

2444-
@Model.register("JinaBertModel")
2445-
class JinaBertModel(BertModel):
2446-
model_arch = gguf.MODEL_ARCH.JINA_BERT
2447-
2448-
def __init__(self, *args, **kwargs):
2449-
super().__init__(*args, **kwargs)
2450-
self.intermediate_size = self.hparams["intermediate_size"]
2451-
2452-
def get_tensors(self):
2453-
for name, data in super().get_tensors():
2454-
if 'gated_layers' in name:
2455-
d1 = data[:self.intermediate_size, :]
2456-
name1 = name.replace('gated_layers', 'gated_layers_w')
2457-
d2 = data[self.intermediate_size:, :]
2458-
name2 = name.replace('gated_layers', 'gated_layers_v')
2459-
yield name1, d1
2460-
yield name2, d2
2461-
continue
2462-
2463-
yield name, data
2464-
2465-
2466-
JinaBertForMaskedML = JinaBertModel
2467-
2468-
24692436
@Model.register("GemmaForCausalLM")
24702437
class GemmaModel(Model):
24712438
model_arch = gguf.MODEL_ARCH.GEMMA
@@ -2493,8 +2460,7 @@ def set_gguf_parameters(self):
24932460
self.gguf_writer.add_block_count(block_count)
24942461
self.gguf_writer.add_feed_forward_length(hparams["intermediate_size"])
24952462
self.gguf_writer.add_head_count(hparams["num_attention_heads"])
2496-
self.gguf_writer.add_head_count_kv(
2497-
self.hparams["num_key_value_heads"] if "num_key_value_heads" in hparams else hparams["num_attention_heads"])
2463+
self.gguf_writer.add_head_count_kv(self.hparams["num_key_value_heads"] if "num_key_value_heads" in hparams else hparams["num_attention_heads"])
24982464
self.gguf_writer.add_layer_norm_rms_eps(self.hparams["rms_norm_eps"])
24992465
self.gguf_writer.add_key_length(hparams["head_dim"])
25002466
self.gguf_writer.add_value_length(hparams["head_dim"])
@@ -2604,10 +2570,10 @@ def set_gguf_parameters(self):
26042570
assert d_inner == 2 * d_model
26052571

26062572
self.gguf_writer.add_name(self.dir_model.name)
2607-
self.gguf_writer.add_context_length(2 ** 20) # arbitrary value; for those who use the default
2573+
self.gguf_writer.add_context_length(2**20) # arbitrary value; for those who use the default
26082574
self.gguf_writer.add_embedding_length(d_model)
2609-
self.gguf_writer.add_feed_forward_length(0) # unused, but seemingly required when loading
2610-
self.gguf_writer.add_head_count(0) # unused, but seemingly required when loading
2575+
self.gguf_writer.add_feed_forward_length(0) # unused, but seemingly required when loading
2576+
self.gguf_writer.add_head_count(0) # unused, but seemingly required when loading
26112577
self.gguf_writer.add_block_count(self.hparams["n_layer"])
26122578
self.gguf_writer.add_ssm_conv_kernel(d_conv)
26132579
self.gguf_writer.add_ssm_inner_size(d_inner)
@@ -2622,7 +2588,7 @@ def write_tensors(self):
26222588

26232589
tok_embd = None
26242590
tok_embd_name = gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.TOKEN_EMBD] + ".weight"
2625-
output_name = gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.OUTPUT] + ".weight"
2591+
output_name = gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.OUTPUT] + ".weight"
26262592

26272593
for name, data_torch in self.get_tensors():
26282594
old_dtype = data_torch.dtype
@@ -2748,6 +2714,29 @@ def write_tensors(self):
27482714

27492715
self.gguf_writer.add_tensor(new_name, data)
27502716

2717+
@Model.register("JinaBertModel")
2718+
class JinaBertModel(BertModel):
2719+
model_arch = gguf.MODEL_ARCH.JINA_BERT
2720+
2721+
def __init__(self, *args, **kwargs):
2722+
super().__init__(*args, **kwargs)
2723+
self.intermediate_size = self.hparams["intermediate_size"]
2724+
2725+
def get_tensors(self):
2726+
for name, data in super().get_tensors():
2727+
if 'gated_layers' in name:
2728+
d1 = data[:self.intermediate_size, :]
2729+
name1 = name.replace('gated_layers', 'gated_layers_w')
2730+
d2 = data[self.intermediate_size:, :]
2731+
name2 = name.replace('gated_layers', 'gated_layers_v')
2732+
yield name1, d1
2733+
yield name2, d2
2734+
continue
2735+
2736+
yield name, data
2737+
2738+
2739+
JinaBertForMaskedML = JinaBertModel
27512740

27522741
###### CONVERSION LOGIC ######
27532742

@@ -2816,6 +2805,7 @@ def main() -> None:
28162805
print(f"Loading model: {dir_model.name}")
28172806

28182807
hparams = Model.load_hparams(dir_model)
2808+
28192809
with torch.inference_mode():
28202810
model_class = Model.from_model_architecture(hparams["architectures"][0])
28212811
model_instance = model_class(dir_model, ftype_map[args.outtype], fname_out, args.bigendian, args.use_temp_file)

0 commit comments

Comments
 (0)