@@ -2999,9 +2999,13 @@ struct llama_model_loader {
2999
2999
3000
3000
ggml_tensor * tensor;
3001
3001
3002
- llama_tensor_weight(uint16_t idx, const char * name, const struct gguf_context * gguf_ctx, ggml_tensor * tensor) : idx(idx), tensor(tensor) {
3002
+ llama_tensor_weight(const llama_file * file, uint16_t idx, const char * name, const struct gguf_context * gguf_ctx, ggml_tensor * tensor) : idx(idx), tensor(tensor) {
3003
3003
const int tensor_idx = gguf_find_tensor(gguf_ctx, name);
3004
3004
offs = gguf_get_data_offset(gguf_ctx) + gguf_get_tensor_offset(gguf_ctx, tensor_idx);
3005
+
3006
+ if (offs + ggml_nbytes(tensor) < offs || offs + ggml_nbytes(tensor) > file->size) {
3007
+ throw std::runtime_error(format("tensor '%s' data is not within the file bounds, model is corrupted or incomplete", name));
3008
+ }
3005
3009
}
3006
3010
};
3007
3011
std::vector<llama_tensor_weight> weights;
@@ -3040,15 +3044,15 @@ struct llama_model_loader {
3040
3044
get_key(llm_kv(LLM_KV_GENERAL_ARCHITECTURE), arch_name, false);
3041
3045
llm_kv = LLM_KV(llm_arch_from_string(arch_name));
3042
3046
3047
+ files.emplace_back(new llama_file(fname.c_str(), "rb"));
3048
+ contexts.emplace_back(ctx);
3049
+
3043
3050
// Save tensors data offset of the main file.
3044
3051
// For subsidiary files, `meta` tensor data offset must not be used,
3045
3052
// so we build a unified tensors index for weights.
3046
3053
for (ggml_tensor * cur = ggml_get_first_tensor(ctx); cur; cur = ggml_get_next_tensor(ctx, cur)) {
3047
- weights.emplace_back(0, cur->name, meta, cur);
3054
+ weights.emplace_back(files.back().get(), 0, cur->name, meta, cur);
3048
3055
}
3049
- files.emplace_back(new llama_file(fname.c_str(), "rb"));
3050
- contexts.emplace_back(ctx);
3051
-
3052
3056
uint16_t n_split = 0;
3053
3057
get_key(llm_kv(LLM_KV_SPLIT_COUNT), n_split, false);
3054
3058
@@ -3082,12 +3086,13 @@ struct llama_model_loader {
3082
3086
throw std::runtime_error(format("%s: failed to load GGUF split from %s\n", __func__, split_path));
3083
3087
}
3084
3088
3089
+ files.emplace_back(new llama_file(split_path, "rb"));
3090
+ contexts.emplace_back(ctx);
3091
+
3085
3092
// Save tensors data offset info of the shard.
3086
3093
for (ggml_tensor * cur = ggml_get_first_tensor(ctx); cur; cur = ggml_get_next_tensor(ctx, cur)) {
3087
- weights.emplace_back(idx, cur->name, ctx_gguf, cur);
3094
+ weights.emplace_back(files.back().get(), idx, cur->name, ctx_gguf, cur);
3088
3095
}
3089
- files.emplace_back(new llama_file(split_path, "rb"));
3090
- contexts.emplace_back(ctx);
3091
3096
3092
3097
gguf_free(ctx_gguf);
3093
3098
}
0 commit comments