@@ -14368,14 +14368,20 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
14368
14368
}
14369
14369
14370
14370
static size_t llama_tensor_quantize_internal(enum ggml_type new_type, const float * f32_data, void * new_data, const int64_t chunk_size, int64_t nrows, int64_t n_per_row, const float * imatrix, std::vector<std::thread> & workers, const int nthread) {
14371
- std::mutex mutex;
14372
- int64_t counter = 0;
14373
- size_t new_size = 0;
14374
14371
if (nthread < 2) {
14375
14372
// single-thread
14376
- return ggml_quantize_chunk(new_type, f32_data, new_data, 0, nrows, n_per_row, imatrix);
14373
+ size_t new_size = ggml_quantize_chunk(new_type, f32_data, new_data, 0, nrows, n_per_row, imatrix);
14374
+ if (!ggml_validate_row_data(new_type, new_data, new_size)) {
14375
+ throw std::runtime_error("quantized data validation failed");
14376
+ }
14377
+ return new_size;
14377
14378
}
14378
- auto compute = [&mutex, &counter, &new_size, new_type, f32_data, new_data, chunk_size,
14379
+
14380
+ std::mutex mutex;
14381
+ int64_t counter = 0;
14382
+ size_t new_size = 0;
14383
+ bool valid = true;
14384
+ auto compute = [&mutex, &counter, &new_size, &valid, new_type, f32_data, new_data, chunk_size,
14379
14385
nrows, n_per_row, imatrix]() {
14380
14386
const int64_t nrows_per_chunk = chunk_size / n_per_row;
14381
14387
size_t local_size = 0;
@@ -14390,7 +14396,17 @@ static size_t llama_tensor_quantize_internal(enum ggml_type new_type, const floa
14390
14396
}
14391
14397
lock.unlock();
14392
14398
const int64_t this_nrow = std::min(nrows - first_row, nrows_per_chunk);
14393
- local_size += ggml_quantize_chunk(new_type, f32_data, new_data, first_row * n_per_row, this_nrow, n_per_row, imatrix);
14399
+ size_t this_size = ggml_quantize_chunk(new_type, f32_data, new_data, first_row * n_per_row, this_nrow, n_per_row, imatrix);
14400
+ local_size += this_size;
14401
+
14402
+ // validate the quantized data
14403
+ const size_t row_size = ggml_row_size(new_type, n_per_row);
14404
+ void * this_data = (char *) new_data + first_row * row_size;
14405
+ if (!ggml_validate_row_data(new_type, this_data, this_size)) {
14406
+ std::unique_lock<std::mutex> lock(mutex);
14407
+ valid = false;
14408
+ break;
14409
+ }
14394
14410
}
14395
14411
};
14396
14412
for (int it = 0; it < nthread - 1; ++it) {
@@ -14399,6 +14415,9 @@ static size_t llama_tensor_quantize_internal(enum ggml_type new_type, const floa
14399
14415
compute();
14400
14416
for (auto & w : workers) { w.join(); }
14401
14417
workers.clear();
14418
+ if (!valid) {
14419
+ throw std::runtime_error("quantized data validation failed");
14420
+ }
14402
14421
return new_size;
14403
14422
}
14404
14423
0 commit comments