Skip to content

Commit c2008b5

Browse files
committed
hparams : remove n_vocab
1 parent 0f02297 commit c2008b5

File tree

4 files changed

+13
-17
lines changed

4 files changed

+13
-17
lines changed

src/llama-context.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -469,11 +469,12 @@ void llama_set_inputs(llama_context & lctx, const llama_ubatch & ubatch) {
469469
size_t llama_output_reserve(struct llama_context & lctx, size_t n_outputs) {
470470
const auto & cparams = lctx.cparams;
471471
const auto & hparams = lctx.model.hparams;
472+
const auto & vocab = lctx.model.vocab;
472473

473474
const size_t n_outputs_max = std::max(n_outputs, (size_t) cparams.n_seq_max);
474475

475476
const auto n_batch = cparams.n_batch;
476-
const auto n_vocab = hparams.n_vocab;
477+
const auto n_vocab = vocab.n_vocab();
477478
const auto n_embd = hparams.n_embd;
478479

479480
// TODO: use a per-batch flag for logits presence instead
@@ -540,7 +541,7 @@ size_t llama_output_reserve(struct llama_context & lctx, size_t n_outputs) {
540541
void llama_output_reorder(struct llama_context & ctx) {
541542
std::vector<size_t> & out_ids = ctx.sbatch.out_ids;
542543
if (!out_ids.empty()) {
543-
const uint32_t n_vocab = ctx.model.hparams.n_vocab;
544+
const uint32_t n_vocab = ctx.model.vocab.n_vocab();
544545
const uint32_t n_embd = ctx.model.hparams.n_embd;
545546

546547
const int32_t n_outputs = ctx.n_outputs;
@@ -724,7 +725,7 @@ float * llama_get_logits_ith(struct llama_context * ctx, int32_t i) {
724725
throw std::runtime_error(format("corrupt output buffer (j=%d, n_outputs=%d)", j, ctx->n_outputs));
725726
}
726727

727-
return ctx->logits + j*ctx->model.hparams.n_vocab;
728+
return ctx->logits + j*ctx->model.vocab.n_vocab();
728729
} catch (const std::exception & err) {
729730
LLAMA_LOG_ERROR("%s: invalid logits id %d, reason: %s\n", __func__, i, err.what());
730731
#ifndef NDEBUG
@@ -884,7 +885,7 @@ struct llama_data_write {
884885
}
885886

886887
void write_logits(const struct llama_context * ctx) {
887-
const uint64_t logits_size = std::min((uint64_t) ctx->logits_size, (uint64_t) ctx->n_outputs * ctx->model.hparams.n_vocab);
888+
const uint64_t logits_size = std::min((uint64_t) ctx->logits_size, (uint64_t) ctx->n_outputs * ctx->model.vocab.n_vocab());
888889

889890
write(&logits_size, sizeof(logits_size));
890891

src/llama-hparams.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@ struct llama_hparams {
3030
bool use_par_res;
3131
bool swin_norm;
3232

33-
uint32_t n_vocab = 0;
3433
uint32_t n_ctx_train; // context size the model was trained on
3534
uint32_t n_embd;
3635
uint32_t n_embd_features = 0;

src/llama-model.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -402,9 +402,6 @@ void llama_model::load_hparams(llama_model_loader & ml) {
402402
// get general kv
403403
ml.get_key(LLM_KV_GENERAL_NAME, name, false);
404404

405-
// get hparams kv
406-
ml.get_key(LLM_KV_VOCAB_SIZE, hparams.n_vocab, false) || ml.get_arr_n(LLM_KV_TOKENIZER_LIST, hparams.n_vocab, false);
407-
408405
// everything past this point is not vocab-related
409406
if (hparams.vocab_only) {
410407
return;
@@ -500,6 +497,10 @@ void llama_model::load_hparams(llama_model_loader & ml) {
500497
hparams.n_embd_head_v = 0;
501498
}
502499

500+
uint32_t n_vocab = 0;
501+
502+
ml.get_key(LLM_KV_VOCAB_SIZE, n_vocab, false) || ml.get_arr_n(LLM_KV_TOKENIZER_LIST, n_vocab, false);
503+
503504
// arch-specific KVs
504505
switch (arch) {
505506
case LLM_ARCH_LLAMA:
@@ -519,7 +520,7 @@ void llama_model::load_hparams(llama_model_loader & ml) {
519520
case 26: type = LLM_TYPE_3B; break;
520521
case 28: type = LLM_TYPE_3B; break; // Llama 3.2 3B
521522
// granite uses a vocab with len 49152
522-
case 32: type = hparams.n_vocab == 49152 ? LLM_TYPE_3B : (hparams.n_vocab < 40000 ? LLM_TYPE_7B : LLM_TYPE_8B); break;
523+
case 32: type = n_vocab == 49152 ? LLM_TYPE_3B : (n_vocab < 40000 ? LLM_TYPE_7B : LLM_TYPE_8B); break;
523524
case 36: type = LLM_TYPE_8B; break; // granite
524525
case 40: type = LLM_TYPE_13B; break;
525526
case 48: type = LLM_TYPE_34B; break;
@@ -1365,7 +1366,7 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
13651366
const int64_t n_embd_head_v = hparams.n_embd_head_v;
13661367
const int64_t n_ff = hparams.n_ff();
13671368
const int64_t n_embd_gqa = n_embd_v_gqa;
1368-
const int64_t n_vocab = hparams.n_vocab;
1369+
const int64_t n_vocab = vocab.n_vocab();
13691370
const int64_t n_vocab_type = hparams.n_vocab_type;
13701371
const int64_t n_rot = hparams.n_rot;
13711372
const int64_t n_expert = hparams.n_expert;
@@ -3494,7 +3495,6 @@ void llama_model::print_info() const {
34943495

34953496
// hparams
34963497
LLAMA_LOG_INFO("%s: arch = %s\n", __func__, arch_name().c_str());
3497-
LLAMA_LOG_INFO("%s: n_vocab (hp) = %u\n", __func__, hparams.n_vocab);
34983498
LLAMA_LOG_INFO("%s: vocab_only = %d\n", __func__, hparams.vocab_only);
34993499

35003500
if (!hparams.vocab_only) {

src/llama.cpp

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -65,11 +65,6 @@ static int llama_model_load(const std::string & fname, llama_model & model, llam
6565
model.load_stats(ml);
6666
model.print_info();
6767

68-
if (model.vocab.get_type() != LLAMA_VOCAB_TYPE_NONE &&
69-
model.hparams.n_vocab != model.vocab.n_vocab()) {
70-
throw std::runtime_error("vocab size mismatch");
71-
}
72-
7368
if (params.vocab_only) {
7469
LLAMA_LOG_INFO("%s: vocab only - skipping tensors\n", __func__);
7570
return 0;
@@ -8342,6 +8337,7 @@ static int llama_decode_impl(
83428337
const uint32_t n_tokens_all = batch.n_tokens;
83438338

83448339
const auto & model = lctx.model;
8340+
const auto & vocab = model.vocab;
83458341
const auto & hparams = model.hparams;
83468342
const auto & cparams = lctx.cparams;
83478343

@@ -8369,7 +8365,7 @@ static int llama_decode_impl(
83698365
llama_kv_slot_restorer kv_slot_restorer(kv_self);
83708366

83718367
const int64_t n_embd = hparams.n_embd;
8372-
const int64_t n_vocab = hparams.n_vocab;
8368+
const int64_t n_vocab = vocab.n_vocab();
83738369

83748370
uint32_t n_outputs = 0;
83758371
uint32_t n_outputs_prev = 0;

0 commit comments

Comments
 (0)