Remove duplicated quantize_config vs cli priority logic from quantize.py (#1282)

mikekgfb · web-flow · commit 397967f0ee51 · 2024-10-08T12:06:05.000-07:00
Remove duplicate priority logic to make code at cli.py:485 the single authoritative priority selection logic between options present in both quantize settings and cli
diff --git a/torchchat/utils/quantize.py b/torchchat/utils/quantize.py
@@ -95,12 +95,9 @@ def quantize_model(
                 if not support_tensor_subclass:
                     unwrap_tensor_subclass(model)
                 continue
-            # Use dtype precision specified in user config, else fallback on global precision.
-            if "precision" in quantize_options:
-                dtype = quantize_options["precision"].get("dtype", str(get_precision()))
-                precision = name_to_dtype(dtype, device)
-            else:
-                precision = get_precision()
+            # We set global precision from quantize options if it is specified at cli.py:485 
+            # so the precision returned by get_precision() is always the authoritative precision/dtype in torchchat
+            precision = get_precision()
 
             try:
                 if quantizer == "linear:a8wxdq":