Skip to content

Commit ea59185

Browse files
committed
validate data asynchronously when possible
ggml-ci
1 parent 55dec7c commit ea59185

File tree

1 file changed

+27
-6
lines changed

1 file changed

+27
-6
lines changed

llama.cpp

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@
7575
#include <forward_list>
7676
#include <fstream>
7777
#include <functional>
78+
#include <future>
7879
#include <initializer_list>
7980
#include <locale>
8081
#include <map>
@@ -3494,6 +3495,8 @@ struct llama_model_loader {
34943495
GGML_ASSERT(size_data != 0 && "call init_mappings() first");
34953496

34963497
std::vector<no_init<uint8_t>> read_buf;
3498+
std::vector<std::future<std::pair<ggml_tensor *, bool>>> validation_result;
3499+
34973500
for (struct ggml_tensor * cur = ggml_get_first_tensor(ctx); cur != NULL; cur = ggml_get_next_tensor(ctx, cur)) {
34983501
const auto * weight = get_weight(ggml_get_name(cur));
34993502
if (weight == nullptr) {
@@ -3515,14 +3518,17 @@ struct llama_model_loader {
35153518
if (bufs_mmap.count(weight->idx)) {
35163519
buf_mmap = bufs_mmap.at(weight->idx);
35173520
}
3521+
uint8_t * data = (uint8_t *) mapping->addr + weight->offs;
35183522

3519-
if (check_tensors && !ggml_validate_row_data(cur->type, (uint8_t *) mapping->addr + weight->offs, n_size)) {
3520-
throw std::runtime_error(format("tensor '%s' has invalid data", ggml_get_name(cur)));
3523+
if (check_tensors) {
3524+
validation_result.emplace_back(std::async(std::launch::async, [cur, data, n_size] {
3525+
return std::make_pair(cur, ggml_validate_row_data(cur->type, data, n_size));
3526+
}));
35213527
}
35223528

35233529
GGML_ASSERT(buf_mmap || cur->data); // either we have a buffer to allocate the tensor in, or it is already allocated
35243530
if (buf_mmap && cur->data == nullptr) {
3525-
ggml_backend_tensor_alloc(buf_mmap, cur, (uint8_t *) mapping->addr + weight->offs);
3531+
ggml_backend_tensor_alloc(buf_mmap, cur, data);
35263532
if (lmlocks) {
35273533
const auto & lmlock = lmlocks->at(weight->idx);
35283534
lmlock->grow_to(weight->offs + n_size);
@@ -3532,16 +3538,18 @@ struct llama_model_loader {
35323538
mmap_used.first = std::min(mmap_used.first, weight->offs);
35333539
mmap_used.second = std::max(mmap_used.second, weight->offs + n_size);
35343540
} else {
3535-
ggml_backend_tensor_set(cur, (uint8_t *) mapping->addr + weight->offs, 0, n_size);
3541+
ggml_backend_tensor_set(cur, data, 0, n_size);
35363542
}
35373543
} else {
35383544
GGML_ASSERT(weight->idx < files.size());
35393545
const auto & file = files.at(weight->idx);
35403546
if (ggml_backend_buffer_is_host(cur->buffer)) {
35413547
file->seek(weight->offs, SEEK_SET);
35423548
file->read_raw(cur->data, n_size);
3543-
if (check_tensors && !ggml_validate_row_data(cur->type, cur->data, n_size)) {
3544-
throw std::runtime_error(format("tensor '%s' has invalid data", ggml_get_name(cur)));
3549+
if (check_tensors) {
3550+
validation_result.emplace_back(std::async(std::launch::async, [cur, n_size] {
3551+
return std::make_pair(cur, ggml_validate_row_data(cur->type, cur->data, n_size));
3552+
}));
35453553
}
35463554
} else {
35473555
read_buf.resize(n_size);
@@ -3557,6 +3565,19 @@ struct llama_model_loader {
35573565
size_done += n_size;
35583566
}
35593567

3568+
// check validation results
3569+
bool validation_failed = false;
3570+
for (auto & future : validation_result) {
3571+
auto result = future.get();
3572+
if (!result.second) {
3573+
LLAMA_LOG_ERROR("%s: tensor '%s' has invalid data\n", __func__, ggml_get_name(result.first));
3574+
validation_failed = true;
3575+
}
3576+
}
3577+
if (validation_failed) {
3578+
throw std::runtime_error("found tensors with invalid data");
3579+
}
3580+
35603581
// check if this is the last call and do final cleanup
35613582
if (size_done >= size_data) {
35623583
// unmap offloaded tensors and metadata

0 commit comments

Comments
 (0)