@@ -128,15 +128,20 @@ function gg_run_open_llama_3b_v2 {
128
128
gg_wget models-mnt/open-llama/3B-v2/ https://huggingface.co/openlm-research/open_llama_3b_v2/resolve/main/pytorch_model.bin
129
129
gg_wget models-mnt/open-llama/3B-v2/ https://huggingface.co/openlm-research/open_llama_3b_v2/raw/main/generation_config.json
130
130
131
+ gg_wget models-mnt/wikitext/ https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-raw-v1.zip
132
+ unzip -o models-mnt/wikitext/wikitext-2-raw-v1.zip -d models-mnt/wikitext/
133
+ head -n 60 models-mnt/wikitext/wikitext-2-raw/wiki.test.raw > models-mnt/wikitext/wikitext-2-raw/wiki.test-60.raw
134
+
135
+ path_models=" ../models-mnt/open-llama/3B-v2"
136
+ path_wiki=" ../models-mnt/wikitext/wikitext-2-raw"
137
+
131
138
rm -rf build-ci-release && mkdir build-ci-release && cd build-ci-release
132
139
133
140
set -e
134
141
135
142
(time cmake -DCMAKE_BUILD_TYPE=Release -DLLAMA_QKK_64=1 .. ) 2>&1 | tee -a $OUT /${ci} -cmake.log
136
143
(time make -j ) 2>&1 | tee -a $OUT /${ci} -make.log
137
144
138
- path_models=" ../models-mnt/open-llama/3B-v2"
139
-
140
145
python3 ../convert.py ${path_models}
141
146
142
147
model_f16=" ${path_models} /ggml-model-f16.bin"
@@ -150,6 +155,8 @@ function gg_run_open_llama_3b_v2 {
150
155
model_q5_k=" ${path_models} /ggml-model-q5_k.bin"
151
156
model_q6_k=" ${path_models} /ggml-model-q6_k.bin"
152
157
158
+ wiki_test_60=" ${path_wiki} /wiki.test-60.raw"
159
+
153
160
./bin/quantize ${model_f16} ${model_q8_0} q8_0
154
161
./bin/quantize ${model_f16} ${model_q4_0} q4_0
155
162
./bin/quantize ${model_f16} ${model_q4_1} q4_1
@@ -160,16 +167,27 @@ function gg_run_open_llama_3b_v2 {
160
167
./bin/quantize ${model_f16} ${model_q5_k} q5_k
161
168
./bin/quantize ${model_f16} ${model_q6_k} q6_k
162
169
163
- (time ./bin/main --model ${model_f16} -s 1234 -n 64 -t 8 -p " I believe the meaning of life is" ) 2>&1 | tee -a $OUT /${ci} -tg-f16.log
164
- (time ./bin/main --model ${model_q8_0} -s 1234 -n 64 -t 8 -p " I believe the meaning of life is" ) 2>&1 | tee -a $OUT /${ci} -tg-q8_0.log
165
- (time ./bin/main --model ${model_q4_0} -s 1234 -n 64 -t 8 -p " I believe the meaning of life is" ) 2>&1 | tee -a $OUT /${ci} -tg-q4_0.log
166
- (time ./bin/main --model ${model_q4_1} -s 1234 -n 64 -t 8 -p " I believe the meaning of life is" ) 2>&1 | tee -a $OUT /${ci} -tg-q4_1.log
167
- (time ./bin/main --model ${model_q5_0} -s 1234 -n 64 -t 8 -p " I believe the meaning of life is" ) 2>&1 | tee -a $OUT /${ci} -tg-q5_0.log
168
- (time ./bin/main --model ${model_q5_1} -s 1234 -n 64 -t 8 -p " I believe the meaning of life is" ) 2>&1 | tee -a $OUT /${ci} -tg-q5_1.log
169
- (time ./bin/main --model ${model_q3_k} -s 1234 -n 64 -t 8 -p " I believe the meaning of life is" ) 2>&1 | tee -a $OUT /${ci} -tg-q3_k.log
170
- (time ./bin/main --model ${model_q4_k} -s 1234 -n 64 -t 8 -p " I believe the meaning of life is" ) 2>&1 | tee -a $OUT /${ci} -tg-q4_k.log
171
- (time ./bin/main --model ${model_q5_k} -s 1234 -n 64 -t 8 -p " I believe the meaning of life is" ) 2>&1 | tee -a $OUT /${ci} -tg-q5_k.log
172
- (time ./bin/main --model ${model_q6_k} -s 1234 -n 64 -t 8 -p " I believe the meaning of life is" ) 2>&1 | tee -a $OUT /${ci} -tg-q6_k.log
170
+ (time ./bin/main --model ${model_f16} -s 1234 -n 64 -p " I believe the meaning of life is" ) 2>&1 | tee -a $OUT /${ci} -tg-f16.log
171
+ (time ./bin/main --model ${model_q8_0} -s 1234 -n 64 -p " I believe the meaning of life is" ) 2>&1 | tee -a $OUT /${ci} -tg-q8_0.log
172
+ (time ./bin/main --model ${model_q4_0} -s 1234 -n 64 -p " I believe the meaning of life is" ) 2>&1 | tee -a $OUT /${ci} -tg-q4_0.log
173
+ (time ./bin/main --model ${model_q4_1} -s 1234 -n 64 -p " I believe the meaning of life is" ) 2>&1 | tee -a $OUT /${ci} -tg-q4_1.log
174
+ (time ./bin/main --model ${model_q5_0} -s 1234 -n 64 -p " I believe the meaning of life is" ) 2>&1 | tee -a $OUT /${ci} -tg-q5_0.log
175
+ (time ./bin/main --model ${model_q5_1} -s 1234 -n 64 -p " I believe the meaning of life is" ) 2>&1 | tee -a $OUT /${ci} -tg-q5_1.log
176
+ (time ./bin/main --model ${model_q3_k} -s 1234 -n 64 -p " I believe the meaning of life is" ) 2>&1 | tee -a $OUT /${ci} -tg-q3_k.log
177
+ (time ./bin/main --model ${model_q4_k} -s 1234 -n 64 -p " I believe the meaning of life is" ) 2>&1 | tee -a $OUT /${ci} -tg-q4_k.log
178
+ (time ./bin/main --model ${model_q5_k} -s 1234 -n 64 -p " I believe the meaning of life is" ) 2>&1 | tee -a $OUT /${ci} -tg-q5_k.log
179
+ (time ./bin/main --model ${model_q6_k} -s 1234 -n 64 -p " I believe the meaning of life is" ) 2>&1 | tee -a $OUT /${ci} -tg-q6_k.log
180
+
181
+ (time ./bin/perplexity --model ${model_f16} -n 64 -f ${wiki_test_60} -c 2048 ) 2>&1 | tee -a $OUT /${ci} -tg-f16.log
182
+ (time ./bin/perplexity --model ${model_q8_0} -n 64 -f ${wiki_test_60} -c 2048 ) 2>&1 | tee -a $OUT /${ci} -tg-q8_0.log
183
+ (time ./bin/perplexity --model ${model_q4_0} -n 64 -f ${wiki_test_60} -c 2048 ) 2>&1 | tee -a $OUT /${ci} -tg-q4_0.log
184
+ (time ./bin/perplexity --model ${model_q4_1} -n 64 -f ${wiki_test_60} -c 2048 ) 2>&1 | tee -a $OUT /${ci} -tg-q4_1.log
185
+ (time ./bin/perplexity --model ${model_q5_0} -n 64 -f ${wiki_test_60} -c 2048 ) 2>&1 | tee -a $OUT /${ci} -tg-q5_0.log
186
+ (time ./bin/perplexity --model ${model_q5_1} -n 64 -f ${wiki_test_60} -c 2048 ) 2>&1 | tee -a $OUT /${ci} -tg-q5_1.log
187
+ (time ./bin/perplexity --model ${model_q3_k} -n 64 -f ${wiki_test_60} -c 2048 ) 2>&1 | tee -a $OUT /${ci} -tg-q3_k.log
188
+ (time ./bin/perplexity --model ${model_q4_k} -n 64 -f ${wiki_test_60} -c 2048 ) 2>&1 | tee -a $OUT /${ci} -tg-q4_k.log
189
+ (time ./bin/perplexity --model ${model_q5_k} -n 64 -f ${wiki_test_60} -c 2048 ) 2>&1 | tee -a $OUT /${ci} -tg-q5_k.log
190
+ (time ./bin/perplexity --model ${model_q6_k} -n 64 -f ${wiki_test_60} -c 2048 ) 2>&1 | tee -a $OUT /${ci} -tg-q6_k.log
173
191
174
192
set +e
175
193
}
0 commit comments