Skip to content

Commit a61b9b4

Browse files
committed
Fix 4
1 parent b439edf commit a61b9b4

File tree

1 file changed

+1
-1
lines changed

1 file changed

+1
-1
lines changed

quantization/quantize.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ def quantize_model(model: nn.Module, device, quantize_options, tokenizer=None):
6161
precision = get_precision()
6262

6363
# Only use quant API for dtype bf16 and CUDA
64-
if precision == torch.bfloat16 and device == "cuda":
64+
if quantizer == "linear:int4" and precision == torch.bfloat16 and device == "cuda":
6565
quantize_(model, int4_weight_only(group_size=q_kwargs["groupsize"]))
6666
continue
6767

0 commit comments

Comments
 (0)