Skip to content

Commit 017e699

Browse files
authored
add basic tensor data validation function (ggml-org#6884)
* add basic tensor data validation function * add --check-tensors command line argument tensor validation is disabled by default and can be enabled by adding `--check-tensors` to the command line arguments. quantize always validates tensors.
1 parent e2764cd commit 017e699

File tree

6 files changed

+371
-19
lines changed

6 files changed

+371
-19
lines changed

common/common.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1089,6 +1089,10 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
10891089
params.n_print = std::stoi(argv[i]);
10901090
return true;
10911091
}
1092+
if (arg == "--check-tensors") {
1093+
params.check_tensors = true;
1094+
return true;
1095+
}
10921096
if (arg == "--ppl-output-type") {
10931097
if (++i >= argc) {
10941098
invalid_param = true;
@@ -1554,6 +1558,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
15541558
printf(" types: int, float, bool. example: --override-kv tokenizer.ggml.add_bos_token=bool:false\n");
15551559
printf(" -ptc N, --print-token-count N\n");
15561560
printf(" print token count every N tokens (default: %d)\n", params.n_print);
1561+
printf(" --check-tensors check model tensor data for invalid values\n");
15571562
printf("\n");
15581563
#ifndef LOG_DISABLE_LOGS
15591564
log_print_usage();
@@ -1774,6 +1779,7 @@ struct llama_model_params llama_model_params_from_gpt_params(const gpt_params &
17741779
mparams.tensor_split = params.tensor_split;
17751780
mparams.use_mmap = params.use_mmap;
17761781
mparams.use_mlock = params.use_mlock;
1782+
mparams.check_tensors = params.check_tensors;
17771783
if (params.kv_overrides.empty()) {
17781784
mparams.kv_overrides = NULL;
17791785
} else {

common/common.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,7 @@ struct gpt_params {
161161
bool dump_kv_cache = false; // dump the KV cache contents for debugging purposes
162162
bool no_kv_offload = false; // disable KV offloading
163163
bool warmup = true; // warmup run
164+
bool check_tensors = false; // validate tensor data
164165

165166
std::string cache_type_k = "f16"; // KV cache data type for the K
166167
std::string cache_type_v = "f16"; // KV cache data type for the V

ggml-quants.c

Lines changed: 284 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12383,3 +12383,287 @@ void quantize_row_iq2_s(const float * restrict x, void * restrict vy, int64_t k)
1238312383
block_iq2_s * restrict y = vy;
1238412384
quantize_row_iq2_s_reference(x, y, k);
1238512385
}
12386+
12387+
static bool validate_float(float f, size_t i) {
12388+
if (isinf(f)) {
12389+
fprintf(stderr, "ggml_validate_row_data: found inf value at block %zu\n", i);
12390+
return false;
12391+
}
12392+
12393+
if (isnan(f)) {
12394+
fprintf(stderr, "ggml_validate_row_data: found nan value at block %zu\n", i);
12395+
return false;
12396+
}
12397+
12398+
return true;
12399+
}
12400+
12401+
static bool isinf_fp16(ggml_fp16_t f) {
12402+
return (f & 0x7c00) == 0x7c00 && (f & 0x03ff) == 0;
12403+
}
12404+
12405+
static bool isnan_fp16(ggml_fp16_t f) {
12406+
return (f & 0x7c00) == 0x7c00 && (f & 0x03ff) != 0;
12407+
}
12408+
12409+
static bool validate_fp16(ggml_fp16_t f, size_t i) {
12410+
if (isinf_fp16(f)) {
12411+
fprintf(stderr, "ggml_validate_row_data: found inf value at block %zu\n", i);
12412+
return false;
12413+
}
12414+
12415+
if (isnan_fp16(f)) {
12416+
fprintf(stderr, "ggml_validate_row_data: found nan value at block %zu\n", i);
12417+
return false;
12418+
}
12419+
12420+
return true;
12421+
}
12422+
12423+
#define VALIDATE_ROW_DATA_D_F16_IMPL(type, data, nb) \
12424+
const type * q = (const type *) (data); \
12425+
for (size_t i = 0; i < (nb); ++i) { \
12426+
if (!validate_fp16(q[i].d, i)) { \
12427+
return false; \
12428+
} \
12429+
}
12430+
12431+
#define VALIDATE_ROW_DATA_DM_F16_IMPL(type, data, nb, d, m) \
12432+
const type * q = (const type *) (data); \
12433+
for (size_t i = 0; i < (nb); ++i) { \
12434+
if (!validate_fp16(q[i].d, i) || !validate_fp16(q[i].m, i)) { \
12435+
return false; \
12436+
} \
12437+
}
12438+
12439+
bool ggml_validate_row_data(enum ggml_type type, const void * data, size_t nbytes) {
12440+
if (type < 0 || type >= GGML_TYPE_COUNT) {
12441+
fprintf(stderr, "%s: invalid type %d\n", __func__, type);
12442+
return false;
12443+
}
12444+
12445+
if (nbytes % ggml_type_size(type) != 0) {
12446+
fprintf(stderr, "%s: invalid size %zu for type %d\n", __func__, nbytes, type);
12447+
return false;
12448+
}
12449+
12450+
const size_t nb = nbytes/ggml_type_size(type);
12451+
12452+
switch (type) {
12453+
case GGML_TYPE_F16:
12454+
{
12455+
const ggml_fp16_t * f = (const ggml_fp16_t *) data;
12456+
size_t i = 0;
12457+
#if defined(__AVX2__)
12458+
for (; i + 15 < nb; i += 16) {
12459+
__m256i v = _mm256_loadu_si256((const __m256i *)(f + i));
12460+
__m256i vexp = _mm256_and_si256(v, _mm256_set1_epi16(0x7c00));
12461+
__m256i cmp = _mm256_cmpeq_epi16(vexp, _mm256_set1_epi16(0x7c00));
12462+
int mask = _mm256_movemask_epi8(cmp);
12463+
if (mask) {
12464+
for (size_t j = 0; j < 16; ++j) {
12465+
if (!validate_fp16(f[i + j], i + j)) {
12466+
return false;
12467+
}
12468+
}
12469+
GGML_UNREACHABLE();
12470+
}
12471+
}
12472+
#elif defined(__ARM_NEON)
12473+
for (; i + 7 < nb; i += 8) {
12474+
uint16x8_t v = vld1q_u16(f + i);
12475+
uint16x8_t vexp = vandq_u16(v, vdupq_n_u16(0x7c00));
12476+
uint16x8_t cmp = vceqq_u16(vexp, vdupq_n_u16(0x7c00));
12477+
uint64_t mask = vget_lane_u64(vreinterpret_u64_u8(vshrn_n_u16(cmp, 4)), 0);
12478+
if (mask) {
12479+
for (size_t j = 0; j < 8; ++j) {
12480+
if (!validate_fp16(f[i + j], i + j)) {
12481+
return false;
12482+
}
12483+
}
12484+
GGML_UNREACHABLE();
12485+
}
12486+
}
12487+
#endif
12488+
for (; i < nb; ++i) {
12489+
if (!validate_fp16(f[i], i)) {
12490+
return false;
12491+
}
12492+
}
12493+
} break;
12494+
case GGML_TYPE_F32:
12495+
{
12496+
const float * f = (const float *) data;
12497+
size_t i = 0;
12498+
#if defined(__AVX2__)
12499+
for (; i + 7 < nb; i += 8) {
12500+
__m256i v = _mm256_loadu_si256((const __m256i *)(f + i));
12501+
__m256i vexp = _mm256_and_si256(v, _mm256_set1_epi32(0x7f800000));
12502+
__m256i cmp = _mm256_cmpeq_epi32(vexp, _mm256_set1_epi32(0x7f800000));
12503+
int mask = _mm256_movemask_epi8(cmp);
12504+
if (mask) {
12505+
for (size_t j = 0; j < 8; ++j) {
12506+
if (!validate_float(f[i + j], i + j)) {
12507+
return false;
12508+
}
12509+
}
12510+
GGML_UNREACHABLE();
12511+
}
12512+
}
12513+
#elif defined(__ARM_NEON)
12514+
for (; i + 3 < nb; i += 4) {
12515+
uint32x4_t v = vld1q_u32((const uint32_t *)f + i);
12516+
uint32x4_t vexp = vandq_u32(v, vdupq_n_u32(0x7f800000));
12517+
uint32x4_t cmp = vceqq_u32(vexp, vdupq_n_u32(0x7f800000));
12518+
uint64_t mask = vget_lane_u64(vreinterpret_u64_u16(vshrn_n_u32(cmp, 8)), 0);
12519+
if (mask) {
12520+
for (size_t j = 0; j < 4; ++j) {
12521+
if (!validate_float(f[i + j], i + j)) {
12522+
return false;
12523+
}
12524+
}
12525+
GGML_UNREACHABLE();
12526+
}
12527+
}
12528+
#endif
12529+
for (; i < nb; ++i) {
12530+
if (!validate_float(f[i], i)) {
12531+
return false;
12532+
}
12533+
}
12534+
} break;
12535+
case GGML_TYPE_F64:
12536+
{
12537+
const double * f = (const double *) data;
12538+
for (size_t i = 0; i < nb; ++i) {
12539+
if (!validate_float(f[i], i)) {
12540+
return false;
12541+
}
12542+
}
12543+
} break;
12544+
case GGML_TYPE_Q4_0:
12545+
{
12546+
VALIDATE_ROW_DATA_D_F16_IMPL(block_q4_0, data, nb);
12547+
} break;
12548+
case GGML_TYPE_Q4_1:
12549+
{
12550+
VALIDATE_ROW_DATA_DM_F16_IMPL(block_q4_1, data, nb, d, m);
12551+
} break;
12552+
case GGML_TYPE_Q5_0:
12553+
{
12554+
VALIDATE_ROW_DATA_D_F16_IMPL(block_q5_0, data, nb);
12555+
} break;
12556+
case GGML_TYPE_Q5_1:
12557+
{
12558+
VALIDATE_ROW_DATA_DM_F16_IMPL(block_q5_1, data, nb, d, m);
12559+
} break;
12560+
case GGML_TYPE_Q8_0:
12561+
{
12562+
VALIDATE_ROW_DATA_D_F16_IMPL(block_q8_0, data, nb);
12563+
} break;
12564+
case GGML_TYPE_Q2_K:
12565+
{
12566+
VALIDATE_ROW_DATA_DM_F16_IMPL(block_q2_K, data, nb, d, dmin);
12567+
} break;
12568+
case GGML_TYPE_Q3_K:
12569+
{
12570+
VALIDATE_ROW_DATA_D_F16_IMPL(block_q3_K, data, nb);
12571+
} break;
12572+
case GGML_TYPE_Q4_K:
12573+
{
12574+
#ifdef GGML_QKK_64
12575+
VALIDATE_ROW_DATA_DM_F16_IMPL(block_q4_K, data, nb, d[0], d[1]);
12576+
#else
12577+
VALIDATE_ROW_DATA_DM_F16_IMPL(block_q4_K, data, nb, d, dmin);
12578+
#endif
12579+
} break;
12580+
case GGML_TYPE_Q5_K:
12581+
{
12582+
#ifdef GGML_QKK_64
12583+
VALIDATE_ROW_DATA_D_F16_IMPL(block_q5_K, data, nb);
12584+
#else
12585+
VALIDATE_ROW_DATA_DM_F16_IMPL(block_q5_K, data, nb, d, dmin);
12586+
#endif
12587+
} break;
12588+
case GGML_TYPE_Q6_K:
12589+
{
12590+
VALIDATE_ROW_DATA_D_F16_IMPL(block_q6_K, data, nb);
12591+
} break;
12592+
case GGML_TYPE_Q8_K:
12593+
{
12594+
const block_q8_K * q = (const block_q8_K *) data;
12595+
for (size_t i = 0; i < nb; ++i) {
12596+
if (!validate_float(q[i].d, i)) {
12597+
return false;
12598+
}
12599+
}
12600+
} break;
12601+
case GGML_TYPE_IQ1_S:
12602+
{
12603+
VALIDATE_ROW_DATA_D_F16_IMPL(block_iq1_s, data, nb);
12604+
} break;
12605+
case GGML_TYPE_IQ1_M:
12606+
{
12607+
const block_iq1_m * q = (const block_iq1_m *) data;
12608+
for (size_t i = 0; i < nb; ++i) {
12609+
#if QK_K == 64
12610+
if (!validate_fp16(q[i].d, i)) {
12611+
return false;
12612+
}
12613+
#else
12614+
iq1m_scale_t scale;
12615+
const uint16_t * sc = (const uint16_t *)q[i].scales;
12616+
scale.u16 = (sc[0] >> 12) | ((sc[1] >> 8) & 0x00f0) | ((sc[2] >> 4) & 0x0f00) | (sc[3] & 0xf000);
12617+
if (!validate_fp16(scale.f16, i)) {
12618+
return false;
12619+
}
12620+
#endif
12621+
}
12622+
} break;
12623+
case GGML_TYPE_IQ2_XXS:
12624+
{
12625+
VALIDATE_ROW_DATA_D_F16_IMPL(block_iq2_xxs, data, nb);
12626+
} break;
12627+
case GGML_TYPE_IQ2_XS:
12628+
{
12629+
VALIDATE_ROW_DATA_D_F16_IMPL(block_iq2_xs, data, nb);
12630+
} break;
12631+
case GGML_TYPE_IQ2_S:
12632+
{
12633+
VALIDATE_ROW_DATA_D_F16_IMPL(block_iq2_s, data, nb);
12634+
} break;
12635+
case GGML_TYPE_IQ3_XXS:
12636+
{
12637+
VALIDATE_ROW_DATA_D_F16_IMPL(block_iq3_xxs, data, nb);
12638+
} break;
12639+
12640+
case GGML_TYPE_IQ3_S:
12641+
{
12642+
VALIDATE_ROW_DATA_D_F16_IMPL(block_iq3_s, data, nb);
12643+
} break;
12644+
case GGML_TYPE_IQ4_XS:
12645+
#if QK_K != 64
12646+
{
12647+
VALIDATE_ROW_DATA_D_F16_IMPL(block_iq4_xs, data, nb);
12648+
} break;
12649+
#endif
12650+
// with QK_K == 64, iq4_xs is iq4_nl
12651+
case GGML_TYPE_IQ4_NL:
12652+
{
12653+
VALIDATE_ROW_DATA_D_F16_IMPL(block_iq4_nl, data, nb);
12654+
} break;
12655+
case GGML_TYPE_I8:
12656+
case GGML_TYPE_I16:
12657+
case GGML_TYPE_I32:
12658+
case GGML_TYPE_I64:
12659+
// nothing to validate
12660+
break;
12661+
default:
12662+
{
12663+
fprintf(stderr, "%s: invalid type %d\n", __func__, type);
12664+
return false;
12665+
}
12666+
}
12667+
12668+
return true;
12669+
}

ggml.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -762,6 +762,8 @@ extern "C" {
762762
// use this to compute the memory overhead of a tensor
763763
GGML_API size_t ggml_tensor_overhead(void);
764764

765+
GGML_API bool ggml_validate_row_data(enum ggml_type type, const void * data, size_t nbytes);
766+
765767
// main
766768

767769
GGML_API struct ggml_context * ggml_init(struct ggml_init_params params);

0 commit comments

Comments
 (0)