75
75
#include <forward_list>
76
76
#include <fstream>
77
77
#include <functional>
78
+ #include <future>
78
79
#include <initializer_list>
79
80
#include <locale>
80
81
#include <map>
@@ -3494,6 +3495,8 @@ struct llama_model_loader {
3494
3495
GGML_ASSERT(size_data != 0 && "call init_mappings() first");
3495
3496
3496
3497
std::vector<no_init<uint8_t>> read_buf;
3498
+ std::vector<std::future<std::pair<ggml_tensor *, bool>>> validation_result;
3499
+
3497
3500
for (struct ggml_tensor * cur = ggml_get_first_tensor(ctx); cur != NULL; cur = ggml_get_next_tensor(ctx, cur)) {
3498
3501
const auto * weight = get_weight(ggml_get_name(cur));
3499
3502
if (weight == nullptr) {
@@ -3515,14 +3518,17 @@ struct llama_model_loader {
3515
3518
if (bufs_mmap.count(weight->idx)) {
3516
3519
buf_mmap = bufs_mmap.at(weight->idx);
3517
3520
}
3521
+ uint8_t * data = (uint8_t *) mapping->addr + weight->offs;
3518
3522
3519
- if (check_tensors && !ggml_validate_row_data(cur->type, (uint8_t *) mapping->addr + weight->offs, n_size)) {
3520
- throw std::runtime_error(format("tensor '%s' has invalid data", ggml_get_name(cur)));
3523
+ if (check_tensors) {
3524
+ validation_result.emplace_back(std::async(std::launch::async, [cur, data, n_size] {
3525
+ return std::make_pair(cur, ggml_validate_row_data(cur->type, data, n_size));
3526
+ }));
3521
3527
}
3522
3528
3523
3529
GGML_ASSERT(buf_mmap || cur->data); // either we have a buffer to allocate the tensor in, or it is already allocated
3524
3530
if (buf_mmap && cur->data == nullptr) {
3525
- ggml_backend_tensor_alloc(buf_mmap, cur, (uint8_t *) mapping->addr + weight->offs );
3531
+ ggml_backend_tensor_alloc(buf_mmap, cur, data );
3526
3532
if (lmlocks) {
3527
3533
const auto & lmlock = lmlocks->at(weight->idx);
3528
3534
lmlock->grow_to(weight->offs + n_size);
@@ -3532,16 +3538,18 @@ struct llama_model_loader {
3532
3538
mmap_used.first = std::min(mmap_used.first, weight->offs);
3533
3539
mmap_used.second = std::max(mmap_used.second, weight->offs + n_size);
3534
3540
} else {
3535
- ggml_backend_tensor_set(cur, (uint8_t *) mapping->addr + weight->offs , 0, n_size);
3541
+ ggml_backend_tensor_set(cur, data , 0, n_size);
3536
3542
}
3537
3543
} else {
3538
3544
GGML_ASSERT(weight->idx < files.size());
3539
3545
const auto & file = files.at(weight->idx);
3540
3546
if (ggml_backend_buffer_is_host(cur->buffer)) {
3541
3547
file->seek(weight->offs, SEEK_SET);
3542
3548
file->read_raw(cur->data, n_size);
3543
- if (check_tensors && !ggml_validate_row_data(cur->type, cur->data, n_size)) {
3544
- throw std::runtime_error(format("tensor '%s' has invalid data", ggml_get_name(cur)));
3549
+ if (check_tensors) {
3550
+ validation_result.emplace_back(std::async(std::launch::async, [cur, n_size] {
3551
+ return std::make_pair(cur, ggml_validate_row_data(cur->type, cur->data, n_size));
3552
+ }));
3545
3553
}
3546
3554
} else {
3547
3555
read_buf.resize(n_size);
@@ -3557,6 +3565,19 @@ struct llama_model_loader {
3557
3565
size_done += n_size;
3558
3566
}
3559
3567
3568
+ // check validation results
3569
+ bool validation_failed = false;
3570
+ for (auto & future : validation_result) {
3571
+ auto result = future.get();
3572
+ if (!result.second) {
3573
+ LLAMA_LOG_ERROR("%s: tensor '%s' has invalid data\n", __func__, ggml_get_name(result.first));
3574
+ validation_failed = true;
3575
+ }
3576
+ }
3577
+ if (validation_failed) {
3578
+ throw std::runtime_error("found tensors with invalid data");
3579
+ }
3580
+
3560
3581
// check if this is the last call and do final cleanup
3561
3582
if (size_done >= size_data) {
3562
3583
// unmap offloaded tensors and metadata
0 commit comments