merge conflict

metascroy · metascroy · commit 6f5a6c3cc640 · 2024-10-23T09:29:59.000-07:00
diff --git a/torchchat/utils/quantize.py b/torchchat/utils/quantize.py
@@ -45,6 +45,7 @@
     find_multiple,
     get_device_str,
     get_precision,
+    set_precision,
     name_to_dtype,
     state_dict_device,
     use_et_backend,
@@ -115,6 +116,13 @@ def quantize_model(
                 if not support_tensor_subclass:
                     unwrap_tensor_subclass(model)
                 continue
+            
+            if quantizer in ["linear:a8wxdq", "embedding:wx"]:
+                # These quantizers require float32 input weights.  Note that after quantization,
+                # the weights will no longer be float32, but lowbit integers
+                if get_precision() != torch.float32:
+                    print(f"Quantizer {quantizer} requires float32 inputs, but received {get_precision()}.  Changing dtype to float32.  Note that after quantization, the weights will be lowbit integers, not float32.")
+                    set_precision(torch.float32)
                 
             # We set global precision from quantize options if it is specified at cli.py:485 
             # so the precision returned by get_precision() is always the authoritative precision/dtype in torchchat
@@ -909,8 +917,8 @@ def quantized_model(self) -> nn.Module:
         IntxWeightEmbeddingQuantizer,
     )
 
-    ao_quantizer_class_dict["linear:a8wxdq"] = Int8DynActIntxWeightLinearQuantizer
-    ao_quantizer_class_dict["embedding:wx"] = IntxWeightEmbeddingQuantizer
+    quantizer_class_dict["linear:a8wxdq"] = Int8DynActIntxWeightLinearQuantizer
+    quantizer_class_dict["embedding:wx"] = IntxWeightEmbeddingQuantizer
 
     # Try loading custom op
     try:
@@ -929,6 +937,6 @@ def __init__(self, model: Optional[nn.Module]=None, device="cpu", precision=None
             global torchao_experimental_load_error
             raise Exception(f"Note: Failed to load torchao experimental quantizer with error: {torchao_experimental_load_error}")
             
-    a8wxdq_load_error = e
+    torchao_experimental_load_error = e
     quantizer_class_dict["linear:a8wxdq"] = ErrorHandler
     quantizer_class_dict["embedding:wx"] = ErrorHandler