Skip to content

Commit ea0c733

Browse files
committed
conflict
1 parent 184bd1b commit ea0c733

File tree

2 files changed

+134
-1
lines changed

2 files changed

+134
-1
lines changed

.ci/scripts/gather_test_models.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323

2424
JOB_RUNNERS = {
2525
"cpu": {
26-
"16-core-ubuntu": "x86_64",
26+
"8-core-ubuntu": "x86_64",
2727
# "macos-12": "x86_64", # not working for complie and ExecuTorch yet
2828
"macos-14": "aarch64",
2929
},

.github/workflows/et-gguf.yml

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
name: Compile main
2+
3+
on:
4+
push:
5+
branches:
6+
- main
7+
pull_request:
8+
workflow_dispatch:
9+
10+
jobs:
11+
run-tinystories:
12+
strategy:
13+
matrix:
14+
runner: [16-core-ubuntu]
15+
runs-on: ${{matrix.runner}}
16+
steps:
17+
- name: Checkout repo
18+
uses: actions/checkout@v2
19+
- name: Setup Python
20+
uses: actions/setup-python@v2
21+
with:
22+
python-version: 3.11
23+
- name: Print machine info
24+
run: |
25+
uname -a
26+
if [ $(uname -s) == Darwin ]; then
27+
sysctl machdep.cpu.brand_string
28+
sysctl machdep.cpu.core_count
29+
fi
30+
- name: Install requirements
31+
run: |
32+
echo "Intalling pip packages"
33+
pip install wheel
34+
pip install cmake
35+
pip install ninja
36+
pip install zstd
37+
pip install -r requirements.txt
38+
39+
echo "Executorch: cloning"
40+
mkdir etorch
41+
cd etorch
42+
git clone https://github.com/pytorch/executorch.git
43+
cd executorch
44+
echo "Inside: ${PWD}"
45+
46+
echo "Executorch: submodule update"
47+
git submodule sync
48+
git submodule update --init
49+
50+
echo "Executorch: installing python interface"
51+
./install_requirements.sh --pybind xnnpack
52+
python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
53+
54+
cd ../..
55+
echo "Inside: ${PWD}"
56+
- name: Install GGUF requirements
57+
run: |
58+
echo "Intalling pip packages"
59+
pip install gguf
60+
61+
git clone https://github.com/ggerganov/llama.cpp.git
62+
pushd llama.cpp
63+
make
64+
popd
65+
- name: Download GGUF files
66+
run: |
67+
mkdir gguf_files
68+
wget -O gguf_files/llama-2-7b.Q4_0.gguf "https://huggingface.co/TheBloke/Llama-2-7B-GGUF/resolve/main/llama-2-7b.Q4_0.gguf?download=true"
69+
./llama.cpp/quantize --allow-requantize gguf_files/llama-2-7b.Q4_0.gguf gguf_files/llama-2-7b.Q4_0.requant_F32.gguf F32
70+
wget -O gguf_files/tokenizer.model https://github.com/karpathy/llama2.c/raw/master/tokenizer.model
71+
- name: Run inference
72+
run: |
73+
export GGUF_PATH=${PWD}/gguf_files/llama-2-7b.Q4_0.gguf
74+
export TOKENIZER_PATH=${PWD}/gguf_files/tokenizer.model
75+
export MODEL_NAME=llama-2-7b_Q4_0_gguf
76+
77+
python generate.py --tokenizer-path ${TOKENIZER_PATH} --gguf-path ${GGUF_PATH} --temperature 0 > ${PWD}/output_eager
78+
cat ${PWD}/output_eager
79+
80+
python export.py --tokenizer-path ${TOKENIZER_PATH} --gguf-path ${GGUF_PATH} --output-pte-path ${PWD}/${MODEL_NAME}.pte
81+
python generate.py --tokenizer-path ${TOKENIZER_PATH} --gguf-path ${GGUF_PATH} --temperature 0 --pte-path ${PWD}/${MODEL_NAME}.pte > ${PWD}/output_et
82+
cat ${PWD}/output_et
83+
84+
echo "Tests complete."
85+
86+
- name: Run inference
87+
run: |
88+
export MODEL_DIR=/tmp
89+
export GGUF_PATH=${PWD}/gguf_files/llama-2-7b.Q4_0.gguf
90+
export TOKENIZER_PATH=${PWD}/gguf_files/tokenizer.model
91+
export MODEL_NAME=llama-2-7b_Q4_0_gguf
92+
93+
python export.py --tokenizer-path ${TOKENIZER_PATH} --gguf-path ${GGUF_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
94+
python generate.py --tokenizer-path ${TOKENIZER_PATH} --gguf-path ${GGUF_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte > ./output_et
95+
cat ./output_et
96+
97+
echo "******************************************"
98+
echo "******* Emb: channel-wise quantized ******"
99+
echo "******************************************"
100+
python export.py --quant '{"embedding" : {"bitwidth": 8, "groupsize": 0}}' --tokenizer-path ${TOKENIZER_PATH} --gguf-path ${GGUF_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
101+
python generate.py --tokenizer-path ${TOKENIZER_PATH} --gguf-path ${GGUF_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte > ./output_et
102+
cat ./output_et
103+
104+
echo "******************************************"
105+
echo "******** Emb: group-wise quantized *******"
106+
echo "******************************************"
107+
python export.py --quant '{"embedding" : {"bitwidth": 8, "groupsize": 8}}' --tokenizer-path ${TOKENIZER_PATH} --gguf-path ${GGUF_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
108+
python generate.py --tokenizer-path ${TOKENIZER_PATH} --gguf-path ${GGUF_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte > ./output_et
109+
cat ./output_et
110+
111+
echo "******************************************"
112+
echo "******* INT8 channel-wise quantized ******"
113+
echo "******************************************"
114+
python export.py --quant '{"linear:int8" : {"bitwidth": 8, "groupsize": 0}}' --tokenizer-path ${TOKENIZER_PATH} --gguf-path ${GGUF_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
115+
python generate.py --tokenizer-path ${TOKENIZER_PATH} --gguf-path ${GGUF_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte > ./output_et
116+
cat ./output_et
117+
118+
echo "******************************************"
119+
echo "******** INT8 group-wise quantized *******"
120+
echo "******************************************"
121+
python export.py --quant '{"linear:int8" : {"bitwidth": 8, "groupsize": 8}}' --tokenizer-path ${TOKENIZER_PATH} --gguf-path ${GGUF_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
122+
python generate.py --tokenizer-path ${TOKENIZER_PATH} --gguf-path ${GGUF_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte > ./output_et
123+
cat ./output_et
124+
125+
echo "******************************************"
126+
echo "******** INT4 group-wise quantized *******"
127+
echo "******************************************"
128+
# python export.py --quant '{"linear:int4" : {"groupsize": 32}}' --tokenizer-path ${TOKENIZER_PATH} --gguf-path ${GGUF_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
129+
# python generate.py --tokenizer-path ${TOKENIZER_PATH} --gguf-path ${GGUF_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte > ./output_et
130+
# cat ./output_et
131+
132+
echo "tests complete"
133+
echo "******************************************"

0 commit comments

Comments
 (0)