correct iQ4_LR

Nexesenex · Nexesenex · commit 66a9b0583e8a · 2024-10-09T04:30:45.000+02:00
diff --git a/src/llama.cpp b/src/llama.cpp
@@ -18805,9 +18805,9 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
         else if (ftype == LLAMA_FTYPE_MOSTLY_IQ4_LR) {
             if (qs.model.hparams.n_vocab >= 127999 && (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2)) {
                 new_type = qs.i_attention_wq < qs.n_attention_wq/8 ? GGML_TYPE_Q5_K :
-                            difquant_fl_more_tensors(qs.i_attention_wq, qs.n_attention_wq) ? GGML_TYPE_Q5_K : GGML_TYPE_Q5_K;
+                            difquant_fl_more_tensors(qs.i_attention_wq, qs.n_attention_wq) ? GGML_TYPE_Q5_K : GGML_TYPE_IQ4_XS;
             }
-            else new_type = difquant_three_eights_tensors(qs.i_attention_wq, qs.n_attention_wq) ? GGML_TYPE_Q5_K : GGML_TYPE_Q5_K;
+            else new_type = difquant_three_eights_tensors(qs.i_attention_wq, qs.n_attention_wq) ? GGML_TYPE_Q5_K : GGML_TYPE_IQ4_XS;
         }
         ++qs.i_attention_wq;
     } else if (name.find("ffn_down") != std::string::npos) {

Original file line number	Diff line number	Diff line change
`@@ -18805,9 +18805,9 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n`
`18805`	`18805`	`else if (ftype == LLAMA_FTYPE_MOSTLY_IQ4_LR) {`
`18806`	`18806`	`if (qs.model.hparams.n_vocab >= 127999 && (qs.model.hparams.n_gqa() >= 2 \|\| qs.model.hparams.n_expert >= 2)) {`
`18807`	`18807`	`new_type = qs.i_attention_wq < qs.n_attention_wq/8 ? GGML_TYPE_Q5_K :`
`18808`		`- difquant_fl_more_tensors(qs.i_attention_wq, qs.n_attention_wq) ? GGML_TYPE_Q5_K : GGML_TYPE_Q5_K;`
	`18808`	`+ difquant_fl_more_tensors(qs.i_attention_wq, qs.n_attention_wq) ? GGML_TYPE_Q5_K : GGML_TYPE_IQ4_XS;`
`18809`	`18809`	`}`
`18810`		`- else new_type = difquant_three_eights_tensors(qs.i_attention_wq, qs.n_attention_wq) ? GGML_TYPE_Q5_K : GGML_TYPE_Q5_K;`
	`18810`	`+ else new_type = difquant_three_eights_tensors(qs.i_attention_wq, qs.n_attention_wq) ? GGML_TYPE_Q5_K : GGML_TYPE_IQ4_XS;`
`18811`	`18811`	`}`
`18812`	`18812`	`++qs.i_attention_wq;`
`18813`	`18813`	`} else if (name.find("ffn_down") != std::string::npos) {`