Skip to content

Commit 8e60a72

Browse files
author
Michael Gschwind
committed
undo hqq breaking change
1 parent 1967270 commit 8e60a72

File tree

1 file changed

+8
-6
lines changed

1 file changed

+8
-6
lines changed

quantize.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1256,14 +1256,16 @@ def create_quantized_state(self):
12561256
).create_quantized_state_dict()
12571257

12581258
def convert_for_runtime(self):
1259-
pass
1260-
1261-
1262-
def quantized_model(self) -> nn.Module:
1263-
self.create_quantized_state()
1259+
# ALSO: all code must work for CPU, CUDA, MPS
12641260
return WeightOnlyInt4GPTQQuantHandler(
12651261
self.model_, self.device, tokenizer=None, groupsize=self.groupsize
1266-
).quantized_model()
1262+
).convert_for_runtime()
1263+
1264+
def quantized_model(self) -> nn.Module:
1265+
model_updated_state_dict = self.create_quantized_state_dict()
1266+
self.convert_for_runtime()
1267+
self.model_.load_state_dict(model_updated_state_dict)
1268+
return self.model_
12671269

12681270

12691271
##########################################################################

0 commit comments

Comments
 (0)