@@ -279,6 +279,20 @@ jobs:
279
279
python generate.py --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte > ./output_et
280
280
cat ./output_et
281
281
282
+ echo "******************************************"
283
+ echo "**** Emb 4bit: channel-wise quantized ****"
284
+ echo "******************************************"
285
+ python export.py --quant '{"embedding" : {"bitwidth": 8, "groupsize": 0}}' --checkpoint-path ${MODEL_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
286
+ python generate.py --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte > ./output_et
287
+ cat ./output_et
288
+
289
+ echo "******************************************"
290
+ echo "****** Emb 4bit: group-wise quantized ****"
291
+ echo "******************************************"
292
+ python export.py --quant '{"embedding" : {"bitwidth": 8, "groupsize": 8}}' --checkpoint-path ${MODEL_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
293
+ python generate.py --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte > ./output_et
294
+ cat ./output_et
295
+
282
296
echo "******************************************"
283
297
echo "******* INT8 channel-wise quantized ******"
284
298
echo "******************************************"
@@ -300,6 +314,20 @@ jobs:
300
314
python generate.py --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte > ./output_et
301
315
# cat ./output_et
302
316
317
+ echo "******************************************"
318
+ echo "******** INT4 group-wise quantized *******"
319
+ echo "******************************************"
320
+ # python export.py --quant '{"linear:int4" : {"groupsize": 32}}' --tokenizer-path ${TOKENIZER_PATH} --gguf-path ${GGUF_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
321
+ # python generate.py --tokenizer-path ${TOKENIZER_PATH} --gguf-path ${GGUF_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte > ./output_et
322
+ # cat ./output_et
323
+
324
+ echo "******************************************"
325
+ echo "******** HQQ group-wise quantized *******"
326
+ echo "******************************************"
327
+ # python export.py --quant '{"linear:hqq" : {"groupsize": 32}}' --tokenizer-path ${TOKENIZER_PATH} --gguf-path ${GGUF_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
328
+ # python generate.py --tokenizer-path ${TOKENIZER_PATH} --gguf-path ${GGUF_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte > ./output_et
329
+ # cat ./output_et
330
+
303
331
echo "tests complete"
304
332
echo "******************************************"
305
333
0 commit comments