Skip to content

Commit aa26201

Browse files
committed
also support loading from llama2.c vocabulary
1 parent d2b95e7 commit aa26201

File tree

1 file changed

+57
-25
lines changed

1 file changed

+57
-25
lines changed

examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp

Lines changed: 57 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -438,6 +438,11 @@ struct llama_file {
438438
read_raw(&ret, sizeof(ret));
439439
return ret;
440440
}
441+
std::float_t read_f32() {
442+
std::float_t ret;
443+
read_raw(&ret, sizeof(ret));
444+
return ret;
445+
}
441446

442447
std::string read_string(std::uint32_t len) {
443448
std::vector<char> chars(len);
@@ -491,30 +496,57 @@ void write_tensor(struct llama_file * file, struct ggml_tensor * tensor) {
491496
file->write_raw(tensor->data, ggml_nbytes(tensor));
492497
}
493498

494-
void load_vocab(const char *filename, struct llama_vocab *vocab) {
495-
struct llama_context_params llama_params = llama_context_default_params();
496-
llama_params.vocab_only = true;
497-
498-
struct llama_model * lmodel = llama_load_model_from_file(filename, llama_params);
499-
struct llama_context * lctx = llama_new_context_with_model(lmodel, llama_params);
500-
501-
std::vector<const char *> strings;
502-
std::vector<float> scores;
503-
int n_vocab = llama_n_vocab(lctx);
504-
strings.resize(n_vocab, NULL);
505-
scores.resize(n_vocab, 0);
506-
n_vocab = llama_get_vocab(lctx, strings.data(), scores.data(), n_vocab);
507-
GGML_ASSERT(n_vocab == llama_n_vocab(lctx));
508-
vocab->id_to_token.resize(n_vocab);
509-
for (int i=0; i<n_vocab; ++i) {
510-
std::string tok = std::string(strings[i]);
511-
float score = scores[i];
512-
vocab->id_to_token[i].tok = tok;
513-
vocab->id_to_token[i].score = score;
514-
vocab->token_to_id.emplace(tok, i);
499+
bool is_ggml_file(const char *filename) {
500+
llama_file file(filename, "rb");
501+
if (file.size < 4) {
502+
return false;
503+
}
504+
uint32_t magic = file.read_u32();
505+
return magic == LLAMA_FILE_MAGIC;
506+
}
507+
508+
void load_vocab(const char *filename, Config *config, struct llama_vocab *vocab) {
509+
// heuristic to infer whether vocab is from ggml or from llama2.c vocabulary
510+
if (is_ggml_file(filename)) {
511+
512+
struct llama_context_params llama_params = llama_context_default_params();
513+
llama_params.vocab_only = true;
514+
515+
struct llama_model * lmodel = llama_load_model_from_file(filename, llama_params);
516+
struct llama_context * lctx = llama_new_context_with_model(lmodel, llama_params);
517+
518+
std::vector<const char *> strings;
519+
std::vector<float> scores;
520+
int n_vocab = llama_n_vocab(lctx);
521+
strings.resize(n_vocab, NULL);
522+
scores.resize(n_vocab, 0);
523+
n_vocab = llama_get_vocab(lctx, strings.data(), scores.data(), n_vocab);
524+
GGML_ASSERT(n_vocab == llama_n_vocab(lctx));
525+
vocab->id_to_token.resize(n_vocab);
526+
for (int i=0; i<n_vocab; ++i) {
527+
std::string tok = std::string(strings[i]);
528+
float score = scores[i];
529+
vocab->id_to_token[i].tok = tok;
530+
vocab->id_to_token[i].score = score;
531+
vocab->token_to_id.emplace(tok, i);
532+
}
533+
llama_free(lctx);
534+
llama_free_model(lmodel);
535+
} else { // assume llama2.c vocabulary
536+
printf("Assuming llama2.c vocabulary since %s is not a ggml file\n", filename);
537+
llama_file file(filename, "rb");
538+
uint32_t n_vocab = config->vocab_size;
539+
/* uint32_t max_token_length = */ file.read_u32(); // unused
540+
vocab->id_to_token.resize(n_vocab);
541+
for (uint32_t i=0; i<n_vocab; ++i) {
542+
float_t score = file.read_f32();
543+
uint32_t len = file.read_u32();
544+
std::string tok = file.read_string(len);
545+
vocab->id_to_token[i].tok = tok;
546+
vocab->id_to_token[i].score = score;
547+
vocab->token_to_id.emplace(tok, i);
548+
}
515549
}
516-
llama_free(lctx);
517-
llama_free_model(lmodel);
518550
}
519551

520552
void stuff_karpathy_weights_into_gg(struct ggml_tensor * gg_weights, float * karpathy_weights){
@@ -684,7 +716,7 @@ void print_usage(int /*argc*/, char ** argv, const struct train_params * params)
684716
fprintf(stderr, "\n");
685717
fprintf(stderr, "options:\n");
686718
fprintf(stderr, " -h, --help show this help message and exit\n");
687-
fprintf(stderr, " --copy-vocab-from-model FNAME model path from which to copy vocab (default '%s')\n", params->fn_vocab_model);
719+
fprintf(stderr, " --copy-vocab-from-model FNAME llama2.c vocabulary or ggml model path from which to copy vocab (default '%s')\n", params->fn_vocab_model);
688720
fprintf(stderr, " --llama2c-model FNAME [REQUIRED] model path from which to load Karpathy's llama2.c model\n");
689721
fprintf(stderr, " --llama2c-output-model FNAME model path to save the converted llama2.c model (default %s')\n", params->fn_llama2c_output_model);
690722
fprintf(stderr, "\n");
@@ -764,7 +796,7 @@ int main(int argc, char ** argv) {
764796
}
765797

766798
struct llama_vocab vocab;
767-
load_vocab(params.fn_vocab_model, &vocab);
799+
load_vocab(params.fn_vocab_model, &config, &vocab);
768800

769801
struct my_llama_model model;
770802
model.hparams.n_vocab = config.vocab_size; //llama_n_vocab(lctx);

0 commit comments

Comments
 (0)