Skip to content

Commit cf4fa0c

Browse files
committed
quantize : validate generated data
1 parent 145d315 commit cf4fa0c

File tree

1 file changed

+25
-6
lines changed

1 file changed

+25
-6
lines changed

llama.cpp

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14368,14 +14368,20 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1436814368
}
1436914369

1437014370
static size_t llama_tensor_quantize_internal(enum ggml_type new_type, const float * f32_data, void * new_data, const int64_t chunk_size, int64_t nrows, int64_t n_per_row, const float * imatrix, std::vector<std::thread> & workers, const int nthread) {
14371-
std::mutex mutex;
14372-
int64_t counter = 0;
14373-
size_t new_size = 0;
1437414371
if (nthread < 2) {
1437514372
// single-thread
14376-
return ggml_quantize_chunk(new_type, f32_data, new_data, 0, nrows, n_per_row, imatrix);
14373+
size_t new_size = ggml_quantize_chunk(new_type, f32_data, new_data, 0, nrows, n_per_row, imatrix);
14374+
if (!ggml_validate_row_data(new_type, new_data, new_size)) {
14375+
throw std::runtime_error("quantized data validation failed");
14376+
}
14377+
return new_size;
1437714378
}
14378-
auto compute = [&mutex, &counter, &new_size, new_type, f32_data, new_data, chunk_size,
14379+
14380+
std::mutex mutex;
14381+
int64_t counter = 0;
14382+
size_t new_size = 0;
14383+
bool valid = true;
14384+
auto compute = [&mutex, &counter, &new_size, &valid, new_type, f32_data, new_data, chunk_size,
1437914385
nrows, n_per_row, imatrix]() {
1438014386
const int64_t nrows_per_chunk = chunk_size / n_per_row;
1438114387
size_t local_size = 0;
@@ -14390,7 +14396,17 @@ static size_t llama_tensor_quantize_internal(enum ggml_type new_type, const floa
1439014396
}
1439114397
lock.unlock();
1439214398
const int64_t this_nrow = std::min(nrows - first_row, nrows_per_chunk);
14393-
local_size += ggml_quantize_chunk(new_type, f32_data, new_data, first_row * n_per_row, this_nrow, n_per_row, imatrix);
14399+
size_t this_size = ggml_quantize_chunk(new_type, f32_data, new_data, first_row * n_per_row, this_nrow, n_per_row, imatrix);
14400+
local_size += this_size;
14401+
14402+
// validate the quantized data
14403+
const size_t row_size = ggml_row_size(new_type, n_per_row);
14404+
void * this_data = (char *) new_data + first_row * row_size;
14405+
if (!ggml_validate_row_data(new_type, this_data, this_size)) {
14406+
std::unique_lock<std::mutex> lock(mutex);
14407+
valid = false;
14408+
break;
14409+
}
1439414410
}
1439514411
};
1439614412
for (int it = 0; it < nthread - 1; ++it) {
@@ -14399,6 +14415,9 @@ static size_t llama_tensor_quantize_internal(enum ggml_type new_type, const floa
1439914415
compute();
1440014416
for (auto & w : workers) { w.join(); }
1440114417
workers.clear();
14418+
if (!valid) {
14419+
throw std::runtime_error("quantized data validation failed");
14420+
}
1440214421
return new_size;
1440314422
}
1440414423

0 commit comments

Comments
 (0)