Skip to content

Commit 0ead1f1

Browse files
authored
llama : check that all the tensor data is in the model file (ggml-org#6885)
* llama : check that all the tensor data is in the model file * also check for unsigned overflow
1 parent 5154372 commit 0ead1f1

File tree

1 file changed

+13
-8
lines changed

1 file changed

+13
-8
lines changed

llama.cpp

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2999,9 +2999,13 @@ struct llama_model_loader {
29992999

30003000
ggml_tensor * tensor;
30013001

3002-
llama_tensor_weight(uint16_t idx, const char * name, const struct gguf_context * gguf_ctx, ggml_tensor * tensor) : idx(idx), tensor(tensor) {
3002+
llama_tensor_weight(const llama_file * file, uint16_t idx, const char * name, const struct gguf_context * gguf_ctx, ggml_tensor * tensor) : idx(idx), tensor(tensor) {
30033003
const int tensor_idx = gguf_find_tensor(gguf_ctx, name);
30043004
offs = gguf_get_data_offset(gguf_ctx) + gguf_get_tensor_offset(gguf_ctx, tensor_idx);
3005+
3006+
if (offs + ggml_nbytes(tensor) < offs || offs + ggml_nbytes(tensor) > file->size) {
3007+
throw std::runtime_error(format("tensor '%s' data is not within the file bounds, model is corrupted or incomplete", name));
3008+
}
30053009
}
30063010
};
30073011
std::vector<llama_tensor_weight> weights;
@@ -3040,15 +3044,15 @@ struct llama_model_loader {
30403044
get_key(llm_kv(LLM_KV_GENERAL_ARCHITECTURE), arch_name, false);
30413045
llm_kv = LLM_KV(llm_arch_from_string(arch_name));
30423046

3047+
files.emplace_back(new llama_file(fname.c_str(), "rb"));
3048+
contexts.emplace_back(ctx);
3049+
30433050
// Save tensors data offset of the main file.
30443051
// For subsidiary files, `meta` tensor data offset must not be used,
30453052
// so we build a unified tensors index for weights.
30463053
for (ggml_tensor * cur = ggml_get_first_tensor(ctx); cur; cur = ggml_get_next_tensor(ctx, cur)) {
3047-
weights.emplace_back(0, cur->name, meta, cur);
3054+
weights.emplace_back(files.back().get(), 0, cur->name, meta, cur);
30483055
}
3049-
files.emplace_back(new llama_file(fname.c_str(), "rb"));
3050-
contexts.emplace_back(ctx);
3051-
30523056
uint16_t n_split = 0;
30533057
get_key(llm_kv(LLM_KV_SPLIT_COUNT), n_split, false);
30543058

@@ -3082,12 +3086,13 @@ struct llama_model_loader {
30823086
throw std::runtime_error(format("%s: failed to load GGUF split from %s\n", __func__, split_path));
30833087
}
30843088

3089+
files.emplace_back(new llama_file(split_path, "rb"));
3090+
contexts.emplace_back(ctx);
3091+
30853092
// Save tensors data offset info of the shard.
30863093
for (ggml_tensor * cur = ggml_get_first_tensor(ctx); cur; cur = ggml_get_next_tensor(ctx, cur)) {
3087-
weights.emplace_back(idx, cur->name, ctx_gguf, cur);
3094+
weights.emplace_back(files.back().get(), idx, cur->name, ctx_gguf, cur);
30883095
}
3089-
files.emplace_back(new llama_file(split_path, "rb"));
3090-
contexts.emplace_back(ctx);
30913096

30923097
gguf_free(ctx_gguf);
30933098
}

0 commit comments

Comments
 (0)