Skip to content

Commit d5c8fcf

Browse files
authored
convert.py : 70b model working (change attn_q permute)
1 parent 7de7cb4 commit d5c8fcf

File tree

1 file changed

+3
-2
lines changed

1 file changed

+3
-2
lines changed

convert.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -326,6 +326,7 @@ def __repr__(self) -> str:
326326
#
327327

328328
def permute(weights: NDArray, n_head: int, n_head_kv: int) -> NDArray:
329+
#print( "permute debug " + str(weights.shape[0]) + " x " + str(weights.shape[1]) + " nhead " + str(n_head) + " nheadkv " + str(n_kv_head) )
329330
if n_head_kv is not None and n_head != n_head_kv:
330331
n_head //= n_head_kv
331332
return (weights.reshape(n_head, 2, weights.shape[0] // n_head // 2, *weights.shape[1:])
@@ -818,12 +819,12 @@ def convert_model_names(model: LazyModel, params: Params) -> LazyModel:
818819
for i in itertools.count():
819820
if f"model.layers.{i}.self_attn.q_proj.weight" in model:
820821
print(f"Permuting layer {i}")
821-
tmp[f"model.layers.{i}.self_attn.q_proj.weight"] = permute_lazy(model[f"model.layers.{i}.self_attn.q_proj.weight"], params.n_head, params.n_head_kv)
822+
tmp[f"model.layers.{i}.self_attn.q_proj.weight"] = permute_lazy(model[f"model.layers.{i}.self_attn.q_proj.weight"], params.n_head, params.n_head)
822823
tmp[f"model.layers.{i}.self_attn.k_proj.weight"] = permute_lazy(model[f"model.layers.{i}.self_attn.k_proj.weight"], params.n_head, params.n_head_kv)
823824
#tmp[f"model.layers.{i}.self_attn.v_proj.weight"] = model[f"model.layers.{i}.self_attn.v_proj.weight"]
824825
elif f"model.layers.{i}.self_attn.W_pack.weight" in model:
825826
print(f"Unpacking and permuting layer {i}")
826-
tmp[f"model.layers.{i}.self_attn.q_proj.weight"] = permute_part_lazy(model[f"model.layers.{i}.self_attn.W_pack.weight"], 0, params.n_head, params.n_head_kv)
827+
tmp[f"model.layers.{i}.self_attn.q_proj.weight"] = permute_part_lazy(model[f"model.layers.{i}.self_attn.W_pack.weight"], 0, params.n_head, params.n_head)
827828
tmp[f"model.layers.{i}.self_attn.k_proj.weight"] = permute_part_lazy(model[f"model.layers.{i}.self_attn.W_pack.weight"], 1, params.n_head, params.n_head_kv)
828829
tmp[f"model.layers.{i}.self_attn.v_proj.weight"] = part_lazy (model[f"model.layers.{i}.self_attn.W_pack.weight"], 2)
829830
else:

0 commit comments

Comments
 (0)