Skip to content

Commit 4fd4fb5

Browse files
committed
Try something
1 parent a61b9b4 commit 4fd4fb5

File tree

1 file changed

+1
-0
lines changed

1 file changed

+1
-0
lines changed

quantization/quantize.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ def quantize_model(model: nn.Module, device, quantize_options, tokenizer=None):
6363
# Only use quant API for dtype bf16 and CUDA
6464
if quantizer == "linear:int4" and precision == torch.bfloat16 and device == "cuda":
6565
quantize_(model, int4_weight_only(group_size=q_kwargs["groupsize"]))
66+
model.to(device=device)
6667
continue
6768

6869
try:

0 commit comments

Comments
 (0)