Skip to content

Commit 145d315

Browse files
committed
add --check-tensors command line argument
tensor validation is disabled by default and can be enabled by adding `--check-tensors` to the command line arguments. quantize always validates tensors.
1 parent c806db3 commit 145d315

File tree

4 files changed

+22
-11
lines changed

4 files changed

+22
-11
lines changed

common/common.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1089,6 +1089,10 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
10891089
params.n_print = std::stoi(argv[i]);
10901090
return true;
10911091
}
1092+
if (arg == "--check-tensors") {
1093+
params.check_tensors = true;
1094+
return true;
1095+
}
10921096
if (arg == "--ppl-output-type") {
10931097
if (++i >= argc) {
10941098
invalid_param = true;
@@ -1554,6 +1558,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
15541558
printf(" types: int, float, bool. example: --override-kv tokenizer.ggml.add_bos_token=bool:false\n");
15551559
printf(" -ptc N, --print-token-count N\n");
15561560
printf(" print token count every N tokens (default: %d)\n", params.n_print);
1561+
printf(" --check-tensors check model tensor data for invalid values\n");
15571562
printf("\n");
15581563
#ifndef LOG_DISABLE_LOGS
15591564
log_print_usage();
@@ -1774,6 +1779,7 @@ struct llama_model_params llama_model_params_from_gpt_params(const gpt_params &
17741779
mparams.tensor_split = params.tensor_split;
17751780
mparams.use_mmap = params.use_mmap;
17761781
mparams.use_mlock = params.use_mlock;
1782+
mparams.check_tensors = params.check_tensors;
17771783
if (params.kv_overrides.empty()) {
17781784
mparams.kv_overrides = NULL;
17791785
} else {

common/common.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,7 @@ struct gpt_params {
161161
bool dump_kv_cache = false; // dump the KV cache contents for debugging purposes
162162
bool no_kv_offload = false; // disable KV offloading
163163
bool warmup = true; // warmup run
164+
bool check_tensors = false; // validate tensor data
164165

165166
std::string cache_type_k = "f16"; // KV cache data type for the K
166167
std::string cache_type_v = "f16"; // KV cache data type for the V

llama.cpp

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2985,6 +2985,7 @@ struct llama_model_loader {
29852985
size_t n_bytes = 0;
29862986

29872987
bool use_mmap = false;
2988+
bool check_tensors;
29882989

29892990
llama_files files;
29902991
llama_ftype ftype;
@@ -3014,7 +3015,7 @@ struct llama_model_loader {
30143015
std::string arch_name;
30153016
LLM_KV llm_kv = LLM_KV(LLM_ARCH_UNKNOWN);
30163017

3017-
llama_model_loader(const std::string & fname, bool use_mmap, const struct llama_model_kv_override * param_overrides_p) {
3018+
llama_model_loader(const std::string & fname, bool use_mmap, bool check_tensors, const struct llama_model_kv_override * param_overrides_p) {
30183019
int trace = 0;
30193020
if (getenv("LLAMA_TRACE")) {
30203021
trace = atoi(getenv("LLAMA_TRACE"));
@@ -3218,6 +3219,7 @@ struct llama_model_loader {
32183219
}
32193220

32203221
this->use_mmap = use_mmap;
3222+
this->check_tensors = check_tensors;
32213223
}
32223224

32233225
~llama_model_loader() {
@@ -3473,7 +3475,7 @@ struct llama_model_loader {
34733475
file->read_raw(cur->data, ggml_nbytes(cur));
34743476
}
34753477

3476-
if (!ggml_validate_row_data(cur->type, cur->data, ggml_nbytes(cur))) {
3478+
if (check_tensors && !ggml_validate_row_data(cur->type, cur->data, ggml_nbytes(cur))) {
34773479
throw std::runtime_error(format("tensor '%s' has invalid data", ggml_get_name(cur)));
34783480
}
34793481
}
@@ -3514,7 +3516,7 @@ struct llama_model_loader {
35143516
buf_mmap = bufs_mmap.at(weight->idx);
35153517
}
35163518

3517-
if (!ggml_validate_row_data(cur->type, (uint8_t *) mapping->addr + weight->offs, n_size)) {
3519+
if (check_tensors && !ggml_validate_row_data(cur->type, (uint8_t *) mapping->addr + weight->offs, n_size)) {
35183520
throw std::runtime_error(format("tensor '%s' has invalid data", ggml_get_name(cur)));
35193521
}
35203522

@@ -3538,15 +3540,15 @@ struct llama_model_loader {
35383540
if (ggml_backend_buffer_is_host(cur->buffer)) {
35393541
file->seek(weight->offs, SEEK_SET);
35403542
file->read_raw(cur->data, n_size);
3541-
if (!ggml_validate_row_data(cur->type, cur->data, n_size)) {
3543+
if (check_tensors && !ggml_validate_row_data(cur->type, cur->data, n_size)) {
35423544
throw std::runtime_error(format("tensor '%s' has invalid data", ggml_get_name(cur)));
35433545
}
35443546
} else {
35453547
read_buf.resize(n_size);
35463548
file->seek(weight->offs, SEEK_SET);
35473549
file->read_raw(read_buf.data(), n_size);
35483550
ggml_backend_tensor_set(cur, read_buf.data(), 0, n_size);
3549-
if (!ggml_validate_row_data(cur->type, read_buf.data(), n_size)) {
3551+
if (check_tensors && !ggml_validate_row_data(cur->type, read_buf.data(), n_size)) {
35503552
throw std::runtime_error(format("tensor '%s' has invalid data", ggml_get_name(cur)));
35513553
}
35523554
}
@@ -5981,7 +5983,7 @@ static bool llm_load_tensors(
59815983
// Returns 0 on success, -1 on error, and -2 on cancellation via llama_progress_callback
59825984
static int llama_model_load(const std::string & fname, llama_model & model, llama_model_params & params) {
59835985
try {
5984-
llama_model_loader ml(fname, params.use_mmap, params.kv_overrides);
5986+
llama_model_loader ml(fname, params.use_mmap, params.check_tensors, params.kv_overrides);
59855987

59865988
model.hparams.vocab_only = params.vocab_only;
59875989

@@ -14459,7 +14461,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
1445914461
auto v = (std::vector<llama_model_kv_override>*)params->kv_overrides;
1446014462
kv_overrides = v->data();
1446114463
}
14462-
llama_model_loader ml(fname_inp, use_mmap, kv_overrides);
14464+
llama_model_loader ml(fname_inp, use_mmap, /*check_tensors*/ true, kv_overrides);
1446314465
ml.init_mappings(false); // no prefetching
1446414466

1446514467
llama_model model;
@@ -14780,7 +14782,7 @@ static int llama_apply_lora_from_file_internal(
1478014782
std::unique_ptr<llama_model_loader> ml;
1478114783
if (path_base_model) {
1478214784
LLAMA_LOG_INFO("%s: loading base model from '%s'\n", __func__, path_base_model);
14783-
ml.reset(new llama_model_loader(path_base_model, /*use_mmap*/ true, /*kv_overrides*/ nullptr));
14785+
ml.reset(new llama_model_loader(path_base_model, /*use_mmap*/ true, /*check_tensors*/ false, /*kv_overrides*/ nullptr));
1478414786
ml->init_mappings(/*prefetch*/ false); // no prefetching
1478514787
}
1478614788

@@ -15039,6 +15041,7 @@ struct llama_model_params llama_model_default_params() {
1503915041
/*.vocab_only =*/ false,
1504015042
/*.use_mmap =*/ true,
1504115043
/*.use_mlock =*/ false,
15044+
/*.check_tensors =*/ false,
1504215045
};
1504315046

1504415047
#ifdef GGML_USE_METAL

llama.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -232,9 +232,10 @@ extern "C" {
232232
const struct llama_model_kv_override * kv_overrides;
233233

234234
// Keep the booleans together to avoid misalignment during copy-by-value.
235-
bool vocab_only; // only load the vocabulary, no weights
236-
bool use_mmap; // use mmap if possible
237-
bool use_mlock; // force system to keep model in RAM
235+
bool vocab_only; // only load the vocabulary, no weights
236+
bool use_mmap; // use mmap if possible
237+
bool use_mlock; // force system to keep model in RAM
238+
bool check_tensors; // validate model tensor data
238239
};
239240

240241
struct llama_context_params {

0 commit comments

Comments
 (0)