Skip to content

Commit 2dd5d2c

Browse files
authored
convert-llama-h5-to-gguf.py : add 70b gqa support
1 parent ca47582 commit 2dd5d2c

File tree

1 file changed

+8
-8
lines changed

1 file changed

+8
-8
lines changed

convert-llama-h5-to-gguf.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# HF llama --> gguf conversion, GQA/70b not supported
1+
# HF llama --> gguf conversion
22

33
import gguf
44
import gguf_namemap as tmap
@@ -10,19 +10,19 @@
1010
import numpy as np
1111
import torch
1212

13-
from typing import Any, List
13+
from typing import Any, List, Optional
1414
from pathlib import Path
1515
from sentencepiece import SentencePieceProcessor
1616

1717
#NDArray = np.ndarray[Any, Any]
1818
# compatible with python < 3.9
1919
NDArray: 'TypeAlias' = 'np.ndarray[Any, Any]'
2020

21-
22-
def permute(weights: NDArray, n_head: int) -> NDArray:
21+
def permute(weights: NDArray, n_head: int, n_kv_head: Optional[int] = None) -> NDArray:
22+
if n_kv_head is not None and n_head != n_kv_head: n_head //= n_kv_head
2323
return (weights.reshape(n_head, 2, weights.shape[0] // n_head // 2, *weights.shape[1:])
24-
.swapaxes(1, 2)
25-
.reshape(weights.shape))
24+
.swapaxes(1, 2)
25+
.reshape(weights.shape))
2626

2727
def count_model_parts(dir_model: str) -> int:
2828
num_parts = 0
@@ -220,7 +220,7 @@ def count_model_parts(dir_model: str) -> int:
220220

221221
# permute these
222222
if name.endswith(".q_proj.weight") or name.endswith(".k_proj.weight"):
223-
data = permute(data,head_count)
223+
data = permute(data, head_count, head_count_kv)
224224

225225
# map tensor names
226226
if name.endswith(".weight") and name[:-7] in tensor_map:
@@ -289,7 +289,7 @@ def count_model_parts(dir_model: str) -> int:
289289

290290
# permute these
291291
if name.endswith(".q_proj.weight") or name.endswith(".k_proj.weight"):
292-
data = permute(data, head_count)
292+
data = permute(data, head_count, head_count_kv)
293293

294294
# map tensor names
295295
if name.endswith(".weight") and name[:-7] in tensor_map:

0 commit comments

Comments
 (0)