Skip to content

Commit 458582a

Browse files
mikekgfbmalfet
authored andcommitted
Merge lowering of gguf files with ET tests into pull.yml (#417)
* Revert "Revert "Embedding quantization per backend (#402)" (#411)" This reverts commit 8b35acd. * merge GGUF tests into pull.yml
1 parent ba0d5d8 commit 458582a

File tree

1 file changed

+28
-0
lines changed

1 file changed

+28
-0
lines changed

.github/workflows/pull.yml

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -279,6 +279,20 @@ jobs:
279279
python generate.py --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte > ./output_et
280280
cat ./output_et
281281
282+
echo "******************************************"
283+
echo "**** Emb 4bit: channel-wise quantized ****"
284+
echo "******************************************"
285+
python export.py --quant '{"embedding" : {"bitwidth": 8, "groupsize": 0}}' --checkpoint-path ${MODEL_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
286+
python generate.py --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte > ./output_et
287+
cat ./output_et
288+
289+
echo "******************************************"
290+
echo "****** Emb 4bit: group-wise quantized ****"
291+
echo "******************************************"
292+
python export.py --quant '{"embedding" : {"bitwidth": 8, "groupsize": 8}}' --checkpoint-path ${MODEL_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
293+
python generate.py --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte > ./output_et
294+
cat ./output_et
295+
282296
echo "******************************************"
283297
echo "******* INT8 channel-wise quantized ******"
284298
echo "******************************************"
@@ -300,6 +314,20 @@ jobs:
300314
python generate.py --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte > ./output_et
301315
# cat ./output_et
302316
317+
echo "******************************************"
318+
echo "******** INT4 group-wise quantized *******"
319+
echo "******************************************"
320+
# python export.py --quant '{"linear:int4" : {"groupsize": 32}}' --tokenizer-path ${TOKENIZER_PATH} --gguf-path ${GGUF_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
321+
# python generate.py --tokenizer-path ${TOKENIZER_PATH} --gguf-path ${GGUF_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte > ./output_et
322+
# cat ./output_et
323+
324+
echo "******************************************"
325+
echo "******** HQQ group-wise quantized *******"
326+
echo "******************************************"
327+
# python export.py --quant '{"linear:hqq" : {"groupsize": 32}}' --tokenizer-path ${TOKENIZER_PATH} --gguf-path ${GGUF_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
328+
# python generate.py --tokenizer-path ${TOKENIZER_PATH} --gguf-path ${GGUF_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte > ./output_et
329+
# cat ./output_et
330+
303331
echo "tests complete"
304332
echo "******************************************"
305333

0 commit comments

Comments
 (0)