Skip to content

Commit a946b40

Browse files
committed
address comments
1 parent 0d3ce09 commit a946b40

File tree

1 file changed

+28
-29
lines changed

1 file changed

+28
-29
lines changed

convert_hf_to_gguf.py

Lines changed: 28 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1514,35 +1514,6 @@ def set_gguf_parameters(self):
15141514
if self.hparams.get("vocab_size", 32000) == 49152:
15151515
self.gguf_writer.add_add_bos_token(False)
15161516

1517-
if rope_scaling := self.find_hparam(["rope_scaling"], optional=True):
1518-
if rope_scaling.get("rope_type", '').lower() == "llama3":
1519-
base = hparams.get("rope_theta", 10000.0)
1520-
dim = int((hparams["hidden_size"] // hparams["num_attention_heads"]) * hparams.get("partial_rotary_embeddings", 1.0))
1521-
freqs = 1.0 / (base ** (torch.arange(0, dim, 2, dtype=torch.float32) / dim))
1522-
1523-
factor = rope_scaling.get("factor", 8.0)
1524-
low_freq_factor = rope_scaling.get("low_freq_factor", 1.0)
1525-
high_freq_factor = rope_scaling.get("high_freq_factor", 4.0)
1526-
old_context_len = hparams.get("original_max_position_embeddings", 8192)
1527-
1528-
low_freq_wavelen = old_context_len / low_freq_factor
1529-
high_freq_wavelen = old_context_len / high_freq_factor
1530-
1531-
rope_factors = []
1532-
for freq in freqs:
1533-
wavelen = 2 * math.pi / freq
1534-
if wavelen < high_freq_wavelen:
1535-
rope_factors.append(1)
1536-
elif wavelen > low_freq_wavelen:
1537-
rope_factors.append(factor)
1538-
else:
1539-
assert low_freq_wavelen != high_freq_wavelen
1540-
smooth = (old_context_len / wavelen - low_freq_factor) / (high_freq_factor - low_freq_factor)
1541-
rope_factors.append(1 / ((1 - smooth) / factor + smooth))
1542-
1543-
self.gguf_writer.add_rope_scaling_attn_factors(1.0)
1544-
self.gguf_writer.add_tensor(self.format_tensor_name(gguf.MODEL_TENSOR.ROPE_FREQS), np.array(rope_factors, dtype=np.float32))
1545-
15461517
@staticmethod
15471518
def permute(weights: Tensor, n_head: int, n_head_kv: int | None):
15481519
if n_head_kv is not None and n_head != n_head_kv:
@@ -1599,6 +1570,34 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
15991570
return [(self.map_tensor_name(name), data_torch)]
16001571

16011572
def prepare_tensors(self):
1573+
if rope_scaling := self.find_hparam(["rope_scaling"], optional=True):
1574+
if rope_scaling.get("rope_type", '').lower() == "llama3":
1575+
base = self.hparams.get("rope_theta", 10000.0)
1576+
dim = int((self.hparams["hidden_size"] // self.hparams["num_attention_heads"]) * self.hparams.get("partial_rotary_embeddings", 1.0))
1577+
freqs = 1.0 / (base ** (torch.arange(0, dim, 2, dtype=torch.float32) / dim))
1578+
1579+
factor = rope_scaling.get("factor", 8.0)
1580+
low_freq_factor = rope_scaling.get("low_freq_factor", 1.0)
1581+
high_freq_factor = rope_scaling.get("high_freq_factor", 4.0)
1582+
old_context_len = self.hparams.get("original_max_position_embeddings", 8192)
1583+
1584+
low_freq_wavelen = old_context_len / low_freq_factor
1585+
high_freq_wavelen = old_context_len / high_freq_factor
1586+
1587+
rope_factors = []
1588+
for freq in freqs:
1589+
wavelen = 2 * math.pi / freq
1590+
if wavelen < high_freq_wavelen:
1591+
rope_factors.append(1)
1592+
elif wavelen > low_freq_wavelen:
1593+
rope_factors.append(factor)
1594+
else:
1595+
assert low_freq_wavelen != high_freq_wavelen
1596+
smooth = (old_context_len / wavelen - low_freq_factor) / (high_freq_factor - low_freq_factor)
1597+
rope_factors.append(1 / ((1 - smooth) / factor + smooth))
1598+
1599+
self.gguf_writer.add_tensor(gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.ROPE_FREQS] + ".weight", np.array(rope_factors, dtype=np.float32))
1600+
16021601
super().prepare_tensors()
16031602

16041603
if self._experts is not None:

0 commit comments

Comments
 (0)