@@ -301,6 +301,130 @@ float16_t dequantFuncQ6_K(const in decodeBufQ6_K bl, const in uint blockCoords[2
301
301
return ret;
302
302
}
303
303
304
+ #if defined(DATA_A_IQ2_XXS)
305
+ layout(buffer_reference, std430, buffer_reference_align = 2) buffer decodeBufIQ2_XXS {
306
+ block_iq2_xxs block;
307
+ };
308
+
309
+ layout(buffer_reference, std430, buffer_reference_align = 2) buffer decodeBufIQ2_XXS_packed16 {
310
+ block_iq2_xxs_packed16 block;
311
+ };
312
+
313
+ float16_t dequantFuncIQ2_XXS(const in decodeBufIQ2_XXS bl, const in uint blockCoords[2], const in uint coordInBlock[2])
314
+ {
315
+ decodeBufIQ2_XXS_packed16 bl16 = decodeBufIQ2_XXS_packed16(bl);
316
+ const float16_t d = bl.block.d;
317
+ const uint idx = coordInBlock[1];
318
+
319
+ const uint ib32 = (idx & 0xE0) >> 5; // 0..7
320
+ const uint ib8 = (idx & 0x18) >> 3; // 0..3
321
+ const uint iqs = 8 * ib32 + ib8;
322
+
323
+ const uint8_t qs = bl.block.qs[iqs];
324
+ const uint signscale = pack32(u16vec2(bl16.block.qs[4*ib32+2], bl16.block.qs[4*ib32+3]));
325
+
326
+ const float16_t dscale = bl.block.d * 0.25hf * (0.5hf + float16_t(signscale >> 28));
327
+ uint sign = bitfieldExtract(signscale, 7 * int(ib8), 7);
328
+ sign |= bitCount(sign) << 7;
329
+
330
+ const uint8_t g = unpack8(iq2xxs_grid[qs][(idx & 4) >> 2])[idx & 3];
331
+
332
+ float16_t ret = dscale * float16_t(g) * ((sign & (1 << (idx & 7))) != 0 ? -1.0hf : 1.0hf);
333
+
334
+ return ret;
335
+ }
336
+ #endif
337
+
338
+ #if defined(DATA_A_IQ2_XS)
339
+ layout(buffer_reference, std430, buffer_reference_align = 2) buffer decodeBufIQ2_XS {
340
+ block_iq2_xs block;
341
+ };
342
+
343
+ float16_t dequantFuncIQ2_XS(const in decodeBufIQ2_XS bl, const in uint blockCoords[2], const in uint coordInBlock[2])
344
+ {
345
+ const float16_t d = bl.block.d;
346
+ const uint idx = coordInBlock[1];
347
+
348
+ const uint is = (idx & 0xE0) >> 5; // 0..8
349
+ const uint sshift = (idx & 0x10) >> 2; // 0,4
350
+ const uint iqs = (idx & 0xF8) >> 3; // 0..63
351
+
352
+ const uint16_t qs = bl.block.qs[iqs];
353
+ const float16_t dscale = bl.block.d * 0.25hf * (0.5hf + float16_t((bl.block.scales[is] >> sshift) & 0xF));
354
+
355
+ uint sign = uint(qs >> 9);
356
+ sign |= bitCount(sign) << 7;
357
+ const uint8_t g = unpack8(iq2xs_grid[qs & 0x1FF][(idx & 4) >> 2])[idx & 3];
358
+
359
+ float16_t ret = dscale * float16_t(g) * ((sign & (1 << (idx & 7))) != 0 ? -1.0hf : 1.0hf);
360
+ return ret;
361
+ }
362
+ #endif
363
+
364
+ #if defined(DATA_A_IQ3_XXS)
365
+ layout(buffer_reference, std430, buffer_reference_align = 2) buffer decodeBufIQ3_XXS {
366
+ block_iq3_xxs block;
367
+ };
368
+
369
+ layout(buffer_reference, std430, buffer_reference_align = 2) buffer decodeBufIQ3_XXS_packed16 {
370
+ block_iq3_xxs_packed16 block;
371
+ };
372
+
373
+ float16_t dequantFuncIQ3_XXS(const in decodeBufIQ3_XXS bl, const in uint blockCoords[2], const in uint coordInBlock[2])
374
+ {
375
+ decodeBufIQ3_XXS_packed16 bl16 = decodeBufIQ3_XXS_packed16(bl);
376
+ const float16_t d = bl.block.d;
377
+ const uint idx = coordInBlock[1];
378
+
379
+ const uint ib32 = (idx & 0xE0) >> 5; // 0..7
380
+ const uint ib4 = (idx & 0xFC) >> 4; // 0..63
381
+ const uint is16 = QUANT_K / 8 + 2 * ib32; // index in packed16
382
+
383
+ const uint8_t qs = bl.block.qs[ib4];
384
+ const uint signscale = pack32(u16vec2(bl16.block.qs[is16], bl16.block.qs[is16+1]));
385
+
386
+ const float16_t dscale = bl.block.d * 0.5hf * (0.5hf + float16_t(signscale >> 28));
387
+ uint sign = bitfieldExtract(signscale, 7 * int(ib4 & 3), 7);
388
+ sign |= bitCount(sign) << 7;
389
+
390
+ const uint8_t g = unpack8(iq3xxs_grid[qs])[idx & 3];
391
+
392
+ float16_t ret = dscale * float16_t(g) * ((sign & (1 << (idx & 7))) != 0 ? -1.0hf : 1.0hf);
393
+ return ret;
394
+ }
395
+ #endif
396
+
397
+ #if defined(DATA_A_IQ3_S)
398
+ layout(buffer_reference, std430, buffer_reference_align = 2) buffer decodeBufIQ3_S {
399
+ block_iq3_s block;
400
+ };
401
+
402
+ float16_t dequantFuncIQ3_S(const in decodeBufIQ3_S bl, const in uint blockCoords[2], const in uint coordInBlock[2])
403
+ {
404
+ const float16_t d = bl.block.d;
405
+ const uint idx = coordInBlock[1];
406
+
407
+ const uint iqs = (idx & 0xFC) >> 2; // 0..63
408
+ const uint iqh = (idx & 0xE0) >> 5; // 0..7
409
+ const uint qhbit = iqs & 7;
410
+ const uint isgn = (idx & 0xF8) >> 3; // 0..31
411
+ const uint is = (idx & 0xC0) >> 6; // 0..3
412
+
413
+ const uint8_t scale = (bl.block.scales[is] >> ((idx & 0x20) >> 3)) & uint8_t(0xF);
414
+ const float16_t dscale = d * (1.0hf + float16_t(2 * scale));
415
+
416
+ const uint qs = bl.block.qs[iqs];
417
+ const uint qh = (bl.block.qh[iqh] << (8 - qhbit)) & 0x100;
418
+ const uint8_t sign = bl.block.signs[isgn];
419
+
420
+ const uint g = unpack8(iq3s_grid[qs | qh])[idx & 3];
421
+ float16_t ret = dscale * float16_t(g) * ((sign & (1 << (idx & 7))) != 0 ? -1.0hf : 1.0hf);
422
+
423
+ return ret;
424
+ }
425
+ #endif
426
+
427
+
304
428
#if defined(DATA_A_IQ4_NL)
305
429
layout(buffer_reference, std430, buffer_reference_align = 2) buffer decodeBufIQ4_NL {
306
430
block_iq4_nl block;
@@ -340,6 +464,14 @@ float16_t dequantFuncIQ4_NL(const in decodeBufIQ4_NL bl, const in uint blockCoor
340
464
#define dequantFuncA dequantFuncQ5_K
341
465
#elif defined(DATA_A_Q6_K)
342
466
#define dequantFuncA dequantFuncQ6_K
467
+ #elif defined(DATA_A_IQ2_XXS)
468
+ #define dequantFuncA dequantFuncIQ2_XXS
469
+ #elif defined(DATA_A_IQ2_XS)
470
+ #define dequantFuncA dequantFuncIQ2_XS
471
+ #elif defined(DATA_A_IQ3_XXS)
472
+ #define dequantFuncA dequantFuncIQ3_XXS
473
+ #elif defined(DATA_A_IQ3_S)
474
+ #define dequantFuncA dequantFuncIQ3_S
343
475
#elif defined(DATA_A_IQ4_NL)
344
476
#define dequantFuncA dequantFuncIQ4_NL
345
477
#endif
0 commit comments