File tree Expand file tree Collapse file tree 1 file changed +8
-6
lines changed Expand file tree Collapse file tree 1 file changed +8
-6
lines changed Original file line number Diff line number Diff line change @@ -1256,14 +1256,16 @@ def create_quantized_state(self):
1256
1256
).create_quantized_state_dict ()
1257
1257
1258
1258
def convert_for_runtime (self ):
1259
- pass
1260
-
1261
-
1262
- def quantized_model (self ) -> nn .Module :
1263
- self .create_quantized_state ()
1259
+ # ALSO: all code must work for CPU, CUDA, MPS
1264
1260
return WeightOnlyInt4GPTQQuantHandler (
1265
1261
self .model_ , self .device , tokenizer = None , groupsize = self .groupsize
1266
- ).quantized_model ()
1262
+ ).convert_for_runtime ()
1263
+
1264
+ def quantized_model (self ) -> nn .Module :
1265
+ model_updated_state_dict = self .create_quantized_state_dict ()
1266
+ self .convert_for_runtime ()
1267
+ self .model_ .load_state_dict (model_updated_state_dict )
1268
+ return self .model_
1267
1269
1268
1270
1269
1271
##########################################################################
You can’t perform that action at this time.
0 commit comments