Skip to content

Commit 1a9c263

Browse files
committed
rwkv7: do not quantize small yet 2D lora weights
Signed-off-by: Molly Sophia <[email protected]>
1 parent 41a80df commit 1a9c263

File tree

1 file changed

+10
-1
lines changed

1 file changed

+10
-1
lines changed

src/llama-quant.cpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -756,10 +756,19 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
756756
// NOTE: can't use LLM_TN here because the layer number is not known
757757
quantize &= name.find("ssm_conv1d.weight") == std::string::npos;
758758

759-
// do not quantize RWKV's time_mix_first tensors
759+
// do not quantize RWKV's small yet 2D weights
760760
quantize &= name.find("time_mix_first.weight") == std::string::npos;
761+
quantize &= name.find("time_mix_w0.weight") == std::string::npos;
761762
quantize &= name.find("time_mix_w1.weight") == std::string::npos;
762763
quantize &= name.find("time_mix_w2.weight") == std::string::npos;
764+
quantize &= name.find("time_mix_v0.weight") == std::string::npos;
765+
quantize &= name.find("time_mix_v1.weight") == std::string::npos;
766+
quantize &= name.find("time_mix_v2.weight") == std::string::npos;
767+
quantize &= name.find("time_mix_a0.weight") == std::string::npos;
768+
quantize &= name.find("time_mix_a1.weight") == std::string::npos;
769+
quantize &= name.find("time_mix_a2.weight") == std::string::npos;
770+
quantize &= name.find("time_mix_g1.weight") == std::string::npos;
771+
quantize &= name.find("time_mix_g2.weight") == std::string::npos;
763772
quantize &= name.find("time_mix_decay_w1.weight") == std::string::npos;
764773
quantize &= name.find("time_mix_decay_w2.weight") == std::string::npos;
765774
quantize &= name.find("time_mix_lerp_fused.weight") == std::string::npos;

0 commit comments

Comments
 (0)