File tree Expand file tree Collapse file tree 1 file changed +5
-4
lines changed
examples/models/llama/source_transformation Expand file tree Collapse file tree 1 file changed +5
-4
lines changed Original file line number Diff line number Diff line change @@ -119,10 +119,11 @@ def quantize( # noqa C901
119
119
# Check for required args
120
120
if group_size is None :
121
121
raise Exception ("For 8da4w quantization, group size must be specified." )
122
+ from torchao .quantization .quant_api import Int8DynActInt4WeightQuantizer
122
123
123
- from torchao . quantization import int8_dynamic_activation_int4_weight , quantize_
124
-
125
- quantize_ (model , int8_dynamic_activation_int4_weight ( group_size = group_size ) )
124
+ model = Int8DynActInt4WeightQuantizer (
125
+ precision = torch_dtype , groupsize = group_size
126
+ ). quantize (model )
126
127
127
128
if verbose :
128
129
print ("quantized model:" , model )
@@ -662,7 +663,7 @@ def convert_for_runtime(self) -> nn.Module:
662
663
def quantized_model (self ) -> nn .Module :
663
664
model_updated_state_dict = self .create_quantized_state_dict (self .packed )
664
665
self .convert_for_runtime ()
665
- self .mod .load_state_dict (model_updated_state_dict , assign = True )
666
+ self .mod .load_state_dict (model_updated_state_dict )
666
667
return self .mod
667
668
668
669
You can’t perform that action at this time.
0 commit comments