8
8
workflow_dispatch :
9
9
10
10
jobs :
11
- test-cuda :
11
+ test-hqq :
12
12
uses : pytorch/test-infra/.github/workflows/linux_job.yml@main
13
13
with :
14
14
runner : linux.g5.4xlarge.nvidia.gpu
28
28
echo "::group::Download checkpoints"
29
29
# Install requirements
30
30
./install_requirements.sh cuda
31
+ bash scripts/build_native.sh aoti
31
32
pip3 list
32
33
python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
34
+ # needed to check for gibberish
35
+ yum install -y aspell aspell-en
33
36
echo "::endgroup::"
34
37
35
38
echo "::group::Download checkpoints"
@@ -42,30 +45,43 @@ jobs:
42
45
43
46
echo "::group::Run inference"
44
47
export MODEL_PATH=checkpoints/stories15M/stories15M.pt
48
+ export TOKENIZER_PATH=checkpoints/stories15M/tokenizer.model
45
49
export MODEL_NAME=stories15M
46
50
export MODEL_DIR=/tmp
47
51
48
- for DTYPE in bfloat16 float16 float32; do
52
+ export PROMPT="Once upon a time in a land far away"
53
+
54
+ for DEVICE in cpu cuda; do
55
+ for DTYPE in bfloat16 float16 float32; do
49
56
50
- python generate.py --dtype ${DTYPE} --device cuda --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager
51
- cat ./output_eager
52
- python generate.py --dtype ${DTYPE} --device cuda --compile --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_compiled
53
- cat ./output_compiled
54
- python export.py --dtype ${DTYPE} --device cuda --checkpoint-path ${MODEL_PATH} --output-dso-path ${MODEL_DIR}/${MODEL_NAME}.so
55
- python generate.py --dtype ${DTYPE} --device cuda --checkpoint-path ${MODEL_PATH} --temperature 0 --dso-path ${MODEL_DIR}/${MODEL_NAME}.so > ./output_aoti
56
- cat ./output_aoti
57
+ python generate.py --dtype ${DTYPE} --device ${DEVICE} --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager
58
+ .ci/scripts/check_gibberish ./output_eager
59
+ python generate.py --dtype ${DTYPE} --device ${DEVICE} --compile --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_compiled
60
+ .ci/scripts/check_gibberish ./output_compiled
61
+ python export.py --dtype ${DTYPE} --device ${DEVICE} --checkpoint-path ${MODEL_PATH} --output-dso-path ${MODEL_DIR}/${MODEL_NAME}.so
62
+ python generate.py --dtype ${DTYPE} --device ${DEVICE} --checkpoint-path ${MODEL_PATH} --temperature 0 --dso-path ${MODEL_DIR}/${MODEL_NAME}.so > ./output_aoti
63
+ .ci/scripts/check_gibberish ./output_aoti
64
+
65
+ ./cmake-out/aoti_run ${MODEL_DIR}/${MODEL_NAME}.so -z ${TOKENIZER_PATH} -i "${PROMPT}" > ./output_runner_aoti
66
+ cat ./output_runner_aoti
67
+ # .ci/scripts/check_gibberish ./output_runner_aoti --no-extract
57
68
58
69
echo "**********************************************"
59
70
echo "******** INT4 HQQ group-wise quantized *******"
60
71
echo "**********************************************"
61
- python generate.py --dtype ${DTYPE} --device cuda --quant '{"linear:hqq" : {"groupsize": 32}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager
62
- cat ./output_eager
63
- python generate.py --dtype ${DTYPE} --device cuda --compile --quant '{"linear:hqq" : {"groupsize": 32}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_compiled
64
- cat ./output_compiled
65
- python export.py --dtype ${DTYPE} --device cuda --quant '{"linear:hqq" : {"groupsize": 32}}' --checkpoint-path ${MODEL_PATH} --output-dso-path ${MODEL_DIR}/${MODEL_NAME}.so
66
- python generate.py --dtype ${DTYPE} --device cuda --checkpoint-path ${MODEL_PATH} --temperature 0 --dso-path ${MODEL_DIR}/${MODEL_NAME}.so > ./output_aoti
67
- cat ./output_aoti
72
+ python generate.py --dtype ${DTYPE} --device ${DEVICE} --quant '{"linear:hqq" : {"groupsize": 32}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager
73
+ .ci/scripts/check_gibberish ./output_eager
74
+ python generate.py --dtype ${DTYPE} --device ${DEVICE} --compile --quant '{"linear:hqq" : {"groupsize": 32}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_compiled
75
+ .ci/scripts/check_gibberish ./output_compiled
76
+ python export.py --dtype ${DTYPE} --device ${DEVICE} --quant '{"linear:hqq" : {"groupsize": 32}}' --checkpoint-path ${MODEL_PATH} --output-dso-path ${MODEL_DIR}/${MODEL_NAME}.so
77
+ python generate.py --dtype ${DTYPE} --device ${DEVICE} --checkpoint-path ${MODEL_PATH} --temperature 0 --dso-path ${MODEL_DIR}/${MODEL_NAME}.so > ./output_aoti
78
+ .ci/scripts/check_gibberish ./output_aoti
79
+
80
+ ./cmake-out/aoti_run ${MODEL_DIR}/${MODEL_NAME}.so -z ${TOKENIZER_PATH} -i "${PROMPT}" > ./output_runner_aoti
81
+ cat ./output_runner_aoti
82
+ # .ci/scripts/check_gibberish ./output_runner_aoti --no-extract
68
83
84
+ done
69
85
done
70
86
71
87
echo "tests complete"
0 commit comments