Skip to content

Commit b4ac339

Browse files
mikekgfbmalfet
authored andcommitted
add aoti c/c++ runner to hqq tests; check output for gibberish using spell (#824)
* add runner to hqq tests * replace cat with a gibberish check * typo * create script to check for gibberish * update gibberish check * update gibberish check * use variable for tokenizer path * aspell dictionaries for english * exclude device name from gibberish check * handle JIT time line * handle Warning: * grep update * fix line exclusion * remove warning which causes gibberish check fail * add sequence extraction for principled handling of perf info and user messages * typo * change output to pass spell check * updates * handle runner which does not have sequence delimiters b/c does not need sequence extraction * add updated workflow yml * typo * native runner weirdness * remove secrets * don't log in for GGUF open_orca model
1 parent 9f72257 commit b4ac339

File tree

7 files changed

+97
-20
lines changed

7 files changed

+97
-20
lines changed

.ci/scripts/check_gibberish

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
#! /bin/bash
2+
3+
#!/bin/bash
4+
# Check the spelling of the specified file
5+
cat "$1"
6+
7+
TMPFILE=/tmp/`basename "$1"`-sequence
8+
9+
if [ "X$2" == "X--no-extract" ]; then
10+
cp "$1" $TMPFILE
11+
else
12+
# We extract only the sequence output and don't spell check status and performance stats
13+
python3 .ci/scripts/extract-sequence.py "$1" >$TMPFILE
14+
15+
if [ $? -ne 0 ]; then
16+
echo "Sequence extraction failed. Exiting."
17+
exit 1
18+
fi
19+
fi
20+
21+
cat ${TMPFILE} | aspell -a -c | grep '^[\&#]' >/tmp/out.$$
22+
# Exit with a non-zero status code if there were any spelling errors because:
23+
# * Finding one or more lines with & or # means we found a spelling error, might be gibberish
24+
if [ $? -ne 0 ]; then
25+
echo "No spelling errors found; likely correct operation. Success."
26+
exit 0
27+
fi
28+
cat /tmp/out.$$
29+
echo "Spelling errors found; might indicate garbage output. Failing."
30+
exit 1

.ci/scripts/extract-sequence.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
import sys
2+
3+
def print_until_equals(filename):
4+
output = False
5+
past_output = False
6+
with open(filename, "r") as f:
7+
for line in f:
8+
if line.startswith("-" * 8):
9+
output = True
10+
if output and line.startswith("=" * 8):
11+
if past_output:
12+
print("Double end-of-sequence line")
13+
exit(1)
14+
past_output = True
15+
output = False
16+
if output:
17+
print(line)
18+
19+
if not past_output:
20+
print("Did find sequence to output")
21+
exit(1)
22+
23+
24+
if __name__ == "__main__":
25+
if len(sys.argv) < 2:
26+
print("Usage: python scriptname.py filename")
27+
sys.exit(1)
28+
filename = sys.argv[1]
29+
print_until_equals(filename)

.github/workflows/hqq-dtype.yml

Lines changed: 32 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ on:
88
workflow_dispatch:
99

1010
jobs:
11-
test-cuda:
11+
test-hqq:
1212
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
1313
with:
1414
runner: linux.g5.4xlarge.nvidia.gpu
@@ -28,8 +28,11 @@ jobs:
2828
echo "::group::Download checkpoints"
2929
# Install requirements
3030
./install_requirements.sh cuda
31+
bash scripts/build_native.sh aoti
3132
pip3 list
3233
python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
34+
# needed to check for gibberish
35+
yum install -y aspell aspell-en
3336
echo "::endgroup::"
3437
3538
echo "::group::Download checkpoints"
@@ -42,30 +45,43 @@ jobs:
4245
4346
echo "::group::Run inference"
4447
export MODEL_PATH=checkpoints/stories15M/stories15M.pt
48+
export TOKENIZER_PATH=checkpoints/stories15M/tokenizer.model
4549
export MODEL_NAME=stories15M
4650
export MODEL_DIR=/tmp
4751
48-
for DTYPE in bfloat16 float16 float32; do
52+
export PROMPT="Once upon a time in a land far away"
53+
54+
for DEVICE in cpu cuda; do
55+
for DTYPE in bfloat16 float16 float32; do
4956
50-
python generate.py --dtype ${DTYPE} --device cuda --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager
51-
cat ./output_eager
52-
python generate.py --dtype ${DTYPE} --device cuda --compile --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_compiled
53-
cat ./output_compiled
54-
python export.py --dtype ${DTYPE} --device cuda --checkpoint-path ${MODEL_PATH} --output-dso-path ${MODEL_DIR}/${MODEL_NAME}.so
55-
python generate.py --dtype ${DTYPE} --device cuda --checkpoint-path ${MODEL_PATH} --temperature 0 --dso-path ${MODEL_DIR}/${MODEL_NAME}.so > ./output_aoti
56-
cat ./output_aoti
57+
python generate.py --dtype ${DTYPE} --device ${DEVICE} --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager
58+
.ci/scripts/check_gibberish ./output_eager
59+
python generate.py --dtype ${DTYPE} --device ${DEVICE} --compile --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_compiled
60+
.ci/scripts/check_gibberish ./output_compiled
61+
python export.py --dtype ${DTYPE} --device ${DEVICE} --checkpoint-path ${MODEL_PATH} --output-dso-path ${MODEL_DIR}/${MODEL_NAME}.so
62+
python generate.py --dtype ${DTYPE} --device ${DEVICE} --checkpoint-path ${MODEL_PATH} --temperature 0 --dso-path ${MODEL_DIR}/${MODEL_NAME}.so > ./output_aoti
63+
.ci/scripts/check_gibberish ./output_aoti
64+
65+
./cmake-out/aoti_run ${MODEL_DIR}/${MODEL_NAME}.so -z ${TOKENIZER_PATH} -i "${PROMPT}" > ./output_runner_aoti
66+
cat ./output_runner_aoti
67+
# .ci/scripts/check_gibberish ./output_runner_aoti --no-extract
5768
5869
echo "**********************************************"
5970
echo "******** INT4 HQQ group-wise quantized *******"
6071
echo "**********************************************"
61-
python generate.py --dtype ${DTYPE} --device cuda --quant '{"linear:hqq" : {"groupsize": 32}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager
62-
cat ./output_eager
63-
python generate.py --dtype ${DTYPE} --device cuda --compile --quant '{"linear:hqq" : {"groupsize": 32}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_compiled
64-
cat ./output_compiled
65-
python export.py --dtype ${DTYPE} --device cuda --quant '{"linear:hqq" : {"groupsize": 32}}' --checkpoint-path ${MODEL_PATH} --output-dso-path ${MODEL_DIR}/${MODEL_NAME}.so
66-
python generate.py --dtype ${DTYPE} --device cuda --checkpoint-path ${MODEL_PATH} --temperature 0 --dso-path ${MODEL_DIR}/${MODEL_NAME}.so > ./output_aoti
67-
cat ./output_aoti
72+
python generate.py --dtype ${DTYPE} --device ${DEVICE} --quant '{"linear:hqq" : {"groupsize": 32}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager
73+
.ci/scripts/check_gibberish ./output_eager
74+
python generate.py --dtype ${DTYPE} --device ${DEVICE} --compile --quant '{"linear:hqq" : {"groupsize": 32}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_compiled
75+
.ci/scripts/check_gibberish ./output_compiled
76+
python export.py --dtype ${DTYPE} --device ${DEVICE} --quant '{"linear:hqq" : {"groupsize": 32}}' --checkpoint-path ${MODEL_PATH} --output-dso-path ${MODEL_DIR}/${MODEL_NAME}.so
77+
python generate.py --dtype ${DTYPE} --device ${DEVICE} --checkpoint-path ${MODEL_PATH} --temperature 0 --dso-path ${MODEL_DIR}/${MODEL_NAME}.so > ./output_aoti
78+
.ci/scripts/check_gibberish ./output_aoti
79+
80+
./cmake-out/aoti_run ${MODEL_DIR}/${MODEL_NAME}.so -z ${TOKENIZER_PATH} -i "${PROMPT}" > ./output_runner_aoti
81+
cat ./output_runner_aoti
82+
# .ci/scripts/check_gibberish ./output_runner_aoti --no-extract
6883
84+
done
6985
done
7086
7187
echo "tests complete"

.github/workflows/run-readme-pr.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -244,3 +244,4 @@ jobs:
244244
echo "tests complete"
245245
echo "*******************************************"
246246
echo "::endgroup::"
247+

build/builder.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -441,6 +441,7 @@ def _initialize_model(
441441

442442
model.to(dtype=builder_args.precision)
443443

444+
print("-----------------------------------------------------------")
444445
return model
445446

446447

generate.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -748,7 +748,7 @@ def callback(x):
748748
aggregate_metrics["tokens_per_sec"].append(tokens_sec)
749749

750750
if jit_compile:
751-
print(f"JIT compilation time (incl runtime): {compilation_time:.2} seconds")
751+
print(f"just-in-time compilation time (incl run time): {compilation_time:.2} seconds")
752752
# Don't continue here.... because we need to report and reset
753753
# continue
754754

quantize.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -550,9 +550,9 @@ def quantize(self, module):
550550
inner_k_tiles=self.inner_k_tiles,
551551
):
552552
if self.padding_allowed:
553-
print(
554-
f"warning: {name} is padded to satisfy in_features % 1024 == 0"
555-
)
553+
# print(
554+
# f"warning: {name} is padded to satisfy in_features % 1024 == 0"
555+
# )
556556
padded_in_features = find_multiple(in_features, 1024)
557557
weight = F.pad(
558558
weight, pad=(0, padded_in_features - in_features)

0 commit comments

Comments
 (0)