iq1_m: minor

Kawrakow · Kawrakow · commit 0dc121448b39 · 2024-03-25T09:39:09.000+02:00
diff --git a/examples/quantize/quantize.cpp b/examples/quantize/quantize.cpp
@@ -26,7 +26,7 @@ static const std::vector<struct quant_option> QUANT_OPTIONS = {
     { "IQ2_S",  LLAMA_FTYPE_MOSTLY_IQ2_S,  " 2.5  bpw quantization",            },
     { "IQ2_M",  LLAMA_FTYPE_MOSTLY_IQ2_M,  " 2.7  bpw quantization",            },
     { "IQ1_S",  LLAMA_FTYPE_MOSTLY_IQ1_S,  " 1.56 bpw quantization",            },
-    { "IQ1_M",  LLAMA_FTYPE_MOSTLY_IQ1_M,  " 1.81 bpw quantization",            },
+    { "IQ1_M",  LLAMA_FTYPE_MOSTLY_IQ1_M,  " 1.75 bpw quantization",            },
     { "Q2_K",   LLAMA_FTYPE_MOSTLY_Q2_K,   " 2.63G, +0.6717 ppl @ LLaMA-v1-7B", },
     { "Q2_K_S", LLAMA_FTYPE_MOSTLY_Q2_K_S, " 2.16G, +9.0634 ppl @ LLaMA-v1-7B", },
     { "IQ3_XXS",LLAMA_FTYPE_MOSTLY_IQ3_XXS," 3.06 bpw quantization",            },
diff --git a/ggml-quants.c b/ggml-quants.c
@@ -12177,16 +12177,6 @@ static void quantize_row_iq1_m_impl(const float * restrict x, void * restrict vy
         sc[1] |= ((s.u16 & 0x00f0) <<  8);
         sc[2] |= ((s.u16 & 0x0f00) <<  4);
         sc[3] |= ((s.u16 & 0xf000) <<  0);
-        //y[ibl].d = GGML_FP32_TO_FP16(d*1.125f); // 1.125f is another fudge factor. Don't ask me why it is needed.
-        //for (int ib = 0; ib < QK_K/block_size; ib += 2) {
-        //    int l1 = nearest_int(0.5f*(id*scales[ib+0]-1));
-        //    l1 = MAX(0, MIN(7, l1));
-        //    int l2 = nearest_int(0.5f*(id*scales[ib+1]-1));
-        //    l2 = MAX(0, MIN(7, l2));
-        //    y[ibl].scales[ib/2] = l1 | (l2 << 4);
-        //    y[ibl].qh[ib+0] |= masks[shifts[ib+0]];
-        //    y[ibl].qh[ib+1] |= masks[shifts[ib+1]];
-        //}
     }
 }
 
diff --git a/llama.cpp b/llama.cpp
@@ -3414,7 +3414,7 @@ static std::string llama_model_ftype_name(llama_ftype ftype) {
         case LLAMA_FTYPE_MOSTLY_IQ3_XS: return "IQ3_XS - 3.3 bpw";
         case LLAMA_FTYPE_MOSTLY_IQ3_XXS:return "IQ3_XXS - 3.0625 bpw";
         case LLAMA_FTYPE_MOSTLY_IQ1_S  :return "IQ1_S - 1.5625 bpw";
-        case LLAMA_FTYPE_MOSTLY_IQ1_M  :return "IQ1_M - 1.8125 bpw";
+        case LLAMA_FTYPE_MOSTLY_IQ1_M  :return "IQ1_M - 1.75 bpw";
         case LLAMA_FTYPE_MOSTLY_IQ4_NL: return "IQ4_NL - 4.5 bpw";
         case LLAMA_FTYPE_MOSTLY_IQ4_XS: return "IQ4_XS - 4.25 bpw";
         case LLAMA_FTYPE_MOSTLY_IQ3_S:  return "IQ3_S - 3.4375 bpw";