Skip to content

Commit 0dc1214

Browse files
committed
iq1_m: minor
1 parent 15d06d0 commit 0dc1214

File tree

3 files changed

+2
-12
lines changed

3 files changed

+2
-12
lines changed

examples/quantize/quantize.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ static const std::vector<struct quant_option> QUANT_OPTIONS = {
2626
{ "IQ2_S", LLAMA_FTYPE_MOSTLY_IQ2_S, " 2.5 bpw quantization", },
2727
{ "IQ2_M", LLAMA_FTYPE_MOSTLY_IQ2_M, " 2.7 bpw quantization", },
2828
{ "IQ1_S", LLAMA_FTYPE_MOSTLY_IQ1_S, " 1.56 bpw quantization", },
29-
{ "IQ1_M", LLAMA_FTYPE_MOSTLY_IQ1_M, " 1.81 bpw quantization", },
29+
{ "IQ1_M", LLAMA_FTYPE_MOSTLY_IQ1_M, " 1.75 bpw quantization", },
3030
{ "Q2_K", LLAMA_FTYPE_MOSTLY_Q2_K, " 2.63G, +0.6717 ppl @ LLaMA-v1-7B", },
3131
{ "Q2_K_S", LLAMA_FTYPE_MOSTLY_Q2_K_S, " 2.16G, +9.0634 ppl @ LLaMA-v1-7B", },
3232
{ "IQ3_XXS",LLAMA_FTYPE_MOSTLY_IQ3_XXS," 3.06 bpw quantization", },

ggml-quants.c

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -12177,16 +12177,6 @@ static void quantize_row_iq1_m_impl(const float * restrict x, void * restrict vy
1217712177
sc[1] |= ((s.u16 & 0x00f0) << 8);
1217812178
sc[2] |= ((s.u16 & 0x0f00) << 4);
1217912179
sc[3] |= ((s.u16 & 0xf000) << 0);
12180-
//y[ibl].d = GGML_FP32_TO_FP16(d*1.125f); // 1.125f is another fudge factor. Don't ask me why it is needed.
12181-
//for (int ib = 0; ib < QK_K/block_size; ib += 2) {
12182-
// int l1 = nearest_int(0.5f*(id*scales[ib+0]-1));
12183-
// l1 = MAX(0, MIN(7, l1));
12184-
// int l2 = nearest_int(0.5f*(id*scales[ib+1]-1));
12185-
// l2 = MAX(0, MIN(7, l2));
12186-
// y[ibl].scales[ib/2] = l1 | (l2 << 4);
12187-
// y[ibl].qh[ib+0] |= masks[shifts[ib+0]];
12188-
// y[ibl].qh[ib+1] |= masks[shifts[ib+1]];
12189-
//}
1219012180
}
1219112181
}
1219212182

llama.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3414,7 +3414,7 @@ static std::string llama_model_ftype_name(llama_ftype ftype) {
34143414
case LLAMA_FTYPE_MOSTLY_IQ3_XS: return "IQ3_XS - 3.3 bpw";
34153415
case LLAMA_FTYPE_MOSTLY_IQ3_XXS:return "IQ3_XXS - 3.0625 bpw";
34163416
case LLAMA_FTYPE_MOSTLY_IQ1_S :return "IQ1_S - 1.5625 bpw";
3417-
case LLAMA_FTYPE_MOSTLY_IQ1_M :return "IQ1_M - 1.8125 bpw";
3417+
case LLAMA_FTYPE_MOSTLY_IQ1_M :return "IQ1_M - 1.75 bpw";
34183418
case LLAMA_FTYPE_MOSTLY_IQ4_NL: return "IQ4_NL - 4.5 bpw";
34193419
case LLAMA_FTYPE_MOSTLY_IQ4_XS: return "IQ4_XS - 4.25 bpw";
34203420
case LLAMA_FTYPE_MOSTLY_IQ3_S: return "IQ3_S - 3.4375 bpw";

0 commit comments

Comments
 (0)