Skip to content

Commit 673ae1a

Browse files
committed
convert-new.py : convert script now works
1 parent 7eaa315 commit 673ae1a

File tree

4 files changed

+44
-36
lines changed

4 files changed

+44
-36
lines changed

convert-llama-7b-pth-to-gguf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -298,7 +298,7 @@ def count_model_parts(dir_model: str) -> int:
298298

299299
print( name + ", shape " + str(len(data.shape)) + ", " + str(old_dtype) + " --> " + str(data.dtype))
300300

301-
gguf_writer.write_tensor_to_file(data)
301+
gguf_writer.write_tensor_data(data)
302302

303303
gguf_writer.close()
304304

convert-new.py

Lines changed: 31 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -669,7 +669,6 @@ def lazy_load_file(path: Path) -> ModelPlus:
669669
In = TypeVar('In')
670670
Out = TypeVar('Out')
671671

672-
673672
def bounded_parallel_map(func: Callable[[In], Out], iterable: Iterable[In], concurrency: int) -> Iterable[Out]:
674673
'''Parallel map, but with backpressure. If the caller doesn't call `next`
675674
fast enough, this will stop calling `func` at some point rather than
@@ -734,42 +733,67 @@ def add_meta_vocab(self, vocab: Vocab) -> None:
734733

735734
# TODO: added / special tokens
736735

736+
def add_tensor_info(self, name: str, tensor: LazyTensor) -> None:
737+
n_elements = 1
738+
for dim in tensor.shape:
739+
n_elements *= dim
740+
data_type = DATA_TYPE_TO_NUMPY[tensor.data_type]
741+
data_nbytes = n_elements * data_type.itemsize
742+
self.gguf.add_tensor_info(name, tensor.shape, data_type, data_nbytes)
743+
737744
def write_meta(self) -> None:
738745
self.gguf.write_header_to_file()
739746
self.gguf.write_kv_data_to_file()
740747

748+
def write_tensor_info(self) -> None:
749+
self.gguf.write_ti_data_to_file()
750+
741751
def close(self) -> None:
742752
self.gguf.close()
743753

744754
@staticmethod
745755
def write_vocab_only(fname_out: Path, params: Params, vocab: Vocab) -> None:
756+
check_vocab_size(params, vocab)
757+
746758
of = OutputFile(fname_out)
759+
760+
# meta data
747761
of.add_meta_arch(params)
748762
of.add_meta_vocab(vocab)
749763
of.write_meta()
764+
750765
of.close()
751766

752767
@staticmethod
753768
def write_all(fname_out: Path, params: Params, model: LazyModel, vocab: Vocab) -> None:
754769
check_vocab_size(params, vocab)
755770

756771
of = OutputFile(fname_out)
772+
773+
# meta data
757774
of.add_meta_arch(params)
758775
of.add_meta_vocab(vocab)
759776

777+
# tensor info
778+
for name, lazy_tensor in model.items():
779+
of.add_tensor_info(name, lazy_tensor)
780+
781+
of.write_meta()
782+
of.write_tensor_info()
783+
760784
def do_item(item: Tuple[str, LazyTensor]) -> NDArray:
761785
name, lazy_tensor = item
762786
return lazy_tensor.load().to_ggml().ndarray
763787

788+
# tensor data
764789
ndarrays = bounded_parallel_map(do_item, model.items(), concurrency=8)
765790
for i, ((name, lazy_tensor), ndarray) in enumerate(zip(model.items(), ndarrays)):
766791
size = ' x '.join(f"{dim:6d}" for dim in lazy_tensor.shape)
767792
padi = len(str(len(model)))
768793
print(f"[{i+1:{padi}d}/{len(model)}] Writing tensor {name:38s} | size {size:16} | type {lazy_tensor.data_type}")
769-
#of.write_tensor_header(name, lazy_tensor.shape, lazy_tensor.data_type)
770-
ndarray.tofile(of.fout)
771-
of.fout.close()
794+
of.gguf.write_tensor_data(ndarray)
772795

796+
of.close()
773797

774798
def pick_output_type(model: LazyModel, output_type_str: Optional[str]) -> GGMLFileType:
775799
wq_type = model[NAMES[gguf.MODEL_TENSOR.ATTN_Q].format(bid=0)+".weight"].data_type
@@ -783,6 +807,9 @@ def pick_output_type(model: LazyModel, output_type_str: Optional[str]) -> GGMLFi
783807

784808
raise Exception(f"Unexpected combination of types: {name_to_type}")
785809

810+
def convert_to_output_type(model: LazyModel, output_type: GGMLFileType) -> LazyModel:
811+
return {name: tensor.astype(output_type.type_for_tensor(name, tensor))
812+
for (name, tensor) in model.items()}
786813

787814
def convert_model_names(model: LazyModel, params: Params) -> LazyModel:
788815
tmap = gguf.get_tensor_name_map(ARCH, params.n_layer)
@@ -808,12 +835,6 @@ def convert_model_names(model: LazyModel, params: Params) -> LazyModel:
808835

809836
return out
810837

811-
812-
def convert_to_output_type(model: LazyModel, output_type: GGMLFileType) -> LazyModel:
813-
return {name: tensor.astype(output_type.type_for_tensor(name, tensor))
814-
for (name, tensor) in model.items()}
815-
816-
817838
def nth_multifile_path(path: Path, n: int) -> Optional[Path]:
818839
'''Given any path belonging to a multi-file model (e.g. foo.bin.1), return
819840
the nth path in the model.

gguf.py

Lines changed: 8 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -70,23 +70,6 @@
7070
# recommended mapping of model tensor names for storage in gguf
7171
#
7272

73-
#LLAMA_TOKEN_EMBD = "token_embd"
74-
#LLAMA_OUTPUT_NORM = "output_norm"
75-
#LLAMA_OUTPUT = "output"
76-
#LLAMA_ATTN_NORM = "blk.{bid}.attn_norm"
77-
#LLAMA_ATTN_Q = "blk.{bid}.attn_q"
78-
#LLAMA_ATTN_K = "blk.{bid}.attn_k"
79-
#LLAMA_ATTN_V = "blk.{bid}.attn_v"
80-
#LLAMA_ATTN_OUTPUT = "blk.{bid}.attn_output"
81-
#LLAMA_FFN_NORM = "blk.{bid}.ffn_norm"
82-
#LLAMA_FFN_GATE = "blk.{bid}.ffn_gate"
83-
#LLAMA_FFN_DOWN = "blk.{bid}.ffn_down"
84-
#LLAMA_FFN_UP = "blk.{bid}.ffn_up"
85-
#
86-
#GPT_POS_EMBD = "pos_embd"
87-
#
88-
#FALCON_ATTN_NORM_2 = "blk.{bid}.attn_norm_2"
89-
9073
class MODEL_ARCH(IntEnum):
9174
LLAMA = auto()
9275
FALCON = auto()
@@ -117,15 +100,15 @@ class MODEL_TENSOR(IntEnum):
117100
MODEL_ARCH_NAMES = {
118101
MODEL_ARCH.LLAMA : "llama",
119102
MODEL_ARCH.FALCON : "falcon",
120-
MODEL_ARCH.GPT2 : "gpt-2",
121-
MODEL_ARCH.GPTJ : "gpt-j",
122-
MODEL_ARCH.GPTNEOX : "gpt-neox",
103+
MODEL_ARCH.GPT2 : "gpt2",
104+
MODEL_ARCH.GPTJ : "gptj",
105+
MODEL_ARCH.GPTNEOX : "gptneox",
123106
MODEL_ARCH.MPT : "mpt",
124107
}
125108

126109
MODEL_TENSOR_NAMES = {
127110
MODEL_ARCH.LLAMA : {
128-
MODEL_TENSOR.TOKEN_EMBD : "tok_embd",
111+
MODEL_TENSOR.TOKEN_EMBD : "token_embd",
129112
MODEL_TENSOR.OUTPUT_NORM : "output_norm",
130113
MODEL_TENSOR.OUTPUT : "output",
131114
MODEL_TENSOR.ROPE_FREQS : "rope_freqs",
@@ -141,7 +124,7 @@ class MODEL_TENSOR(IntEnum):
141124
MODEL_TENSOR.FFN_UP : "blk.{bid}.ffn_up",
142125
},
143126
MODEL_ARCH.FALCON : {
144-
MODEL_TENSOR.TOKEN_EMBD : "tok_embd",
127+
MODEL_TENSOR.TOKEN_EMBD : "token_embd",
145128
MODEL_TENSOR.OUTPUT_NORM : "output_norm",
146129
MODEL_TENSOR.OUTPUT : "output",
147130
MODEL_TENSOR.ATTN_NORM : "blk.{bid}.attn_norm",
@@ -495,7 +478,7 @@ def add_tensor_info(self, name: str, tensor_shape: np.ndarray, tensor_dtype: np.
495478
self.offset_tensor += GGUFWriter.ggml_pad(tensor_nbytes, self.data_alignment)
496479
self.ti_data_count += 1
497480

498-
def write_tensor_to_file(self, tensor: np.ndarray):
481+
def write_tensor_data(self, tensor: np.ndarray):
499482
pad = GGUFWriter.ggml_pad(self.fout.tell(), self.data_alignment) - self.fout.tell()
500483
if pad != 0:
501484
self.fout.write(bytes([0] * pad))
@@ -650,7 +633,7 @@ def add_pad_token_id(self, id: int):
650633
gguf_writer.write_header_to_file()
651634
gguf_writer.write_kv_data_to_file()
652635
gguf_writer.write_ti_data_to_file()
653-
gguf_writer.write_tensor_to_file(tensor1)
654-
gguf_writer.write_tensor_to_file(tensor2)
636+
gguf_writer.write_tensor_data(tensor1)
637+
gguf_writer.write_tensor_data(tensor2)
655638

656639
gguf_writer.close()

llama.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1115,6 +1115,10 @@ struct llama_model_loader {
11151115
struct ggml_tensor * create_tensor(struct ggml_context * ctx, const std::string & name, const std::vector<uint32_t> & ne, ggml_backend backend) {
11161116
struct ggml_tensor * cur = ggml_get_tensor(ctx_meta, name.c_str());
11171117

1118+
if (cur == NULL) {
1119+
throw std::runtime_error(format("%s: tensor '%s' not found", __func__, name.c_str()));
1120+
}
1121+
11181122
{
11191123
bool is_ok = true;
11201124
for (size_t i = 0; i < ne.size(); ++i) {

0 commit comments

Comments
 (0)