File tree Expand file tree Collapse file tree 2 files changed +7
-4
lines changed Expand file tree Collapse file tree 2 files changed +7
-4
lines changed Original file line number Diff line number Diff line change @@ -192,11 +192,8 @@ def quantize(
192
192
elif qmode == "int4" :
193
193
model_int4 = Int8DynActInt4WeightQuantHandler (
194
194
model , activation_precision = torch_dtype
195
- )
196
- model_int4_state_dict = model_int4 .create_quantized_state_dict ()
197
- model_int4 = model_int4 .convert_for_runtime ()
195
+ ).quantized_model ()
198
196
print ("quantized model:" , model_int4 )
199
- model_int4 .load_state_dict (model_int4_state_dict )
200
197
return model_int4
201
198
else :
202
199
raise Exception (f"Unrecognized quantize mode: { qmode } " )
Original file line number Diff line number Diff line change @@ -1002,6 +1002,12 @@ def convert_for_runtime(self):
1002
1002
)
1003
1003
return self .mod
1004
1004
1005
+ def quantized_model (self ) -> nn .Module :
1006
+ model_updated_state_dict = self .create_quantized_state_dict ()
1007
+ self .convert_for_runtime ()
1008
+ self .mod .load_state_dict (model_updated_state_dict )
1009
+ return self .mod
1010
+
1005
1011
1006
1012
class Int8DynActInt4WeightLinear (torch .nn .Module ):
1007
1013
__constants__ = ["in_features" , "out_features" ]
You can’t perform that action at this time.
0 commit comments