undo hqq breaking change

Michael Gschwind · Michael Gschwind · commit 8e60a72074ee · 2024-04-26T15:21:40.000-07:00
diff --git a/quantize.py b/quantize.py
@@ -1256,14 +1256,16 @@ def create_quantized_state(self):
         ).create_quantized_state_dict()
 
     def convert_for_runtime(self):
-        pass
-
-
-    def quantized_model(self) -> nn.Module:
-        self.create_quantized_state()
+        # ALSO: all code must work for CPU, CUDA, MPS
         return WeightOnlyInt4GPTQQuantHandler(
             self.model_, self.device, tokenizer=None, groupsize=self.groupsize
-        ).quantized_model()                                                     
+        ).convert_for_runtime()
+
+    def quantized_model(self) -> nn.Module:
+        model_updated_state_dict = self.create_quantized_state_dict()
+        self.convert_for_runtime()
+        self.model_.load_state_dict(model_updated_state_dict)
+        return self.model_
 
 
 ##########################################################################