Skip to content

Commit 397967f

Browse files
authored
Remove duplicated quantize_config vs cli priority logic from quantize.py (#1282)
Remove duplicate priority logic to make code at cli.py:485 the single authoritative priority selection logic between options present in both quantize settings and cli
1 parent 9fb7999 commit 397967f

File tree

1 file changed

+3
-6
lines changed

1 file changed

+3
-6
lines changed

torchchat/utils/quantize.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -95,12 +95,9 @@ def quantize_model(
9595
if not support_tensor_subclass:
9696
unwrap_tensor_subclass(model)
9797
continue
98-
# Use dtype precision specified in user config, else fallback on global precision.
99-
if "precision" in quantize_options:
100-
dtype = quantize_options["precision"].get("dtype", str(get_precision()))
101-
precision = name_to_dtype(dtype, device)
102-
else:
103-
precision = get_precision()
98+
# We set global precision from quantize options if it is specified at cli.py:485
99+
# so the precision returned by get_precision() is always the authoritative precision/dtype in torchchat
100+
precision = get_precision()
104101

105102
try:
106103
if quantizer == "linear:a8wxdq":

0 commit comments

Comments
 (0)