Skip to content

Commit 84981d9

Browse files
committed
debug
1 parent 8794941 commit 84981d9

File tree

1 file changed

+1
-1
lines changed

1 file changed

+1
-1
lines changed

quantization/quantize.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ def quantize_model(model: nn.Module, device, quantize_options, tokenizer=None):
6363
# Only use quant API for dtype bf16 and CUDA
6464
if quantizer == "linear:int4" and precision == torch.bfloat16 and device == "cuda":
6565
quantize_(model, int4_weight_only(group_size=q_kwargs["groupsize"]))
66-
model.to(device=device)
66+
model.to(device="cuda")
6767
continue
6868

6969
try:

0 commit comments

Comments
 (0)