Skip to content

Commit e98280b

Browse files
authored
Merge branch 'ggerganov:master' into add_beam_search
2 parents 2107c84 + 785829d commit e98280b

31 files changed

+669
-374
lines changed

.gitignore

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ build/
1616
build-em/
1717
build-debug/
1818
build-release/
19+
build-ci-debug/
20+
build-ci-release/
1921
build-static/
2022
build-cublas/
2123
build-opencl/
@@ -25,9 +27,10 @@ build-no-accel/
2527
build-sanitize-addr/
2628
build-sanitize-thread/
2729
out/
30+
tmp/
2831

2932
models/*
30-
*.bin
33+
models-mnt
3134

3235
/main
3336
/quantize

CMakeLists.txt

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,16 @@ if (LLAMA_BLAS)
186186
pkg_check_modules(DepBLAS REQUIRED flexiblas_api)
187187
elseif (${LLAMA_BLAS_VENDOR} MATCHES "Intel")
188188
# all Intel* libraries share the same include path
189-
pkg_check_modules(DepBLAS REQUIRED mkl-sdl)
189+
pkg_check_modules(DepBLAS mkl-sdl)
190+
if (NOT DepBLAS)
191+
if (BUILD_SHARED_LIBS)
192+
set(LINK_METHOD dynamic)
193+
else()
194+
set(LINK_METHOD static)
195+
endif()
196+
string(REGEX REPLACE ".*_" "" DATA_TYPE_MODEL ${LLAMA_BLAS_VENDOR})
197+
pkg_check_modules(DepBLAS REQUIRED mkl-${LINK_METHOD}-${DATA_TYPE_MODEL}-iomp)
198+
endif()
190199
elseif (${LLAMA_BLAS_VENDOR} MATCHES "NVHPC")
191200
# this doesn't provide pkg-config
192201
# suggest to assign BLAS_INCLUDE_DIRS on your own
@@ -512,6 +521,7 @@ if (BUILD_SHARED_LIBS)
512521
set_target_properties(ggml PROPERTIES POSITION_INDEPENDENT_CODE ON)
513522
add_library(ggml_shared SHARED $<TARGET_OBJECTS:ggml>)
514523
target_link_libraries(ggml_shared PUBLIC Threads::Threads ${LLAMA_EXTRA_LIBS})
524+
install(TARGETS ggml_shared LIBRARY)
515525
endif()
516526

517527
add_library(llama
@@ -533,8 +543,32 @@ if (BUILD_SHARED_LIBS)
533543
if (LLAMA_METAL)
534544
set_target_properties(llama PROPERTIES RESOURCE "${CMAKE_CURRENT_SOURCE_DIR}/ggml-metal.metal")
535545
endif()
546+
install(TARGETS llama LIBRARY)
536547
endif()
537548

549+
include(GNUInstallDirs)
550+
install(
551+
FILES convert.py
552+
PERMISSIONS
553+
OWNER_READ
554+
OWNER_WRITE
555+
OWNER_EXECUTE
556+
GROUP_READ
557+
GROUP_EXECUTE
558+
WORLD_READ
559+
WORLD_EXECUTE
560+
DESTINATION ${CMAKE_INSTALL_BINDIR})
561+
install(
562+
FILES convert-lora-to-ggml.py
563+
PERMISSIONS
564+
OWNER_READ
565+
OWNER_WRITE
566+
OWNER_EXECUTE
567+
GROUP_READ
568+
GROUP_EXECUTE
569+
WORLD_READ
570+
WORLD_EXECUTE
571+
DESTINATION ${CMAKE_INSTALL_BINDIR})
538572

539573
#
540574
# programs, examples and tests

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -360,7 +360,7 @@ Building the program with BLAS support may lead to some performance improvements
360360
```bash
361361
mkdir build
362362
cd build
363-
cmake .. -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=Intel10_64lp -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
363+
cmake .. -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=Intel10_lp64 -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
364364
cmake --build . --config Release
365365
```
366366

ci/README.md

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
# CI
2+
3+
In addition to [Github Actions](https://github.com/ggerganov/llama.cpp/actions) `llama.cpp` uses a custom CI framework:
4+
5+
https://github.com/ggml-org/ci
6+
7+
It monitors the `master` branch for new commits and runs the
8+
[ci/run.sh](https://github.com/ggerganov/llama.cpp/blob/master/ci/run.sh) script on dedicated cloud instances. This allows us
9+
to execute heavier workloads compared to just using Github Actions. Also with time, the cloud instances will be scaled
10+
to cover various hardware architectures, including GPU and Apple Silicon instances.
11+
12+
Collaborators can optionally trigger the CI run by adding the `ggml-ci` keyword to their commit message.
13+
Only the branches of this repo are monitored for this keyword.
14+
15+
It is a good practice, before publishing changes to execute the full CI locally on your machine:
16+
17+
```bash
18+
mkdir tmp
19+
bash ./ci/run.sh ./tmp/results ./tmp/mnt
20+
```

ci/run.sh

Lines changed: 262 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,262 @@
1+
#/bin/bash
2+
3+
if [ -z "$2" ]; then
4+
echo "usage: $0 <output-dir> <mnt-dir>"
5+
exit 1
6+
fi
7+
8+
mkdir -p "$1"
9+
mkdir -p "$2"
10+
11+
OUT=$(realpath "$1")
12+
MNT=$(realpath "$2")
13+
14+
rm -v $OUT/*.log
15+
rm -v $OUT/*.exit
16+
rm -v $OUT/*.md
17+
18+
sd=`dirname $0`
19+
cd $sd/../
20+
SRC=`pwd`
21+
22+
## helpers
23+
24+
# download a file if it does not exist or if it is outdated
25+
function gg_wget {
26+
local out=$1
27+
local url=$2
28+
29+
local cwd=`pwd`
30+
31+
mkdir -p $out
32+
cd $out
33+
34+
# should not re-download if file is the same
35+
wget -nv -N $url
36+
37+
cd $cwd
38+
}
39+
40+
function gg_printf {
41+
printf -- "$@" >> $OUT/README.md
42+
}
43+
44+
function gg_run {
45+
ci=$1
46+
47+
set -o pipefail
48+
set -x
49+
50+
gg_run_$ci | tee $OUT/$ci.log
51+
cur=$?
52+
echo "$cur" > $OUT/$ci.exit
53+
54+
set +x
55+
set +o pipefail
56+
57+
gg_sum_$ci
58+
59+
ret=$((ret | cur))
60+
}
61+
62+
## ci
63+
64+
# ctest_debug
65+
66+
function gg_run_ctest_debug {
67+
cd ${SRC}
68+
69+
rm -rf build-ci-debug && mkdir build-ci-debug && cd build-ci-debug
70+
71+
set -e
72+
73+
(time cmake -DCMAKE_BUILD_TYPE=Debug .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
74+
(time make -j ) 2>&1 | tee -a $OUT/${ci}-make.log
75+
76+
(time ctest --output-on-failure -E test-opt ) 2>&1 | tee -a $OUT/${ci}-ctest.log
77+
78+
set +e
79+
}
80+
81+
function gg_sum_ctest_debug {
82+
gg_printf '### %s\n\n' "${ci}"
83+
84+
gg_printf 'Runs ctest in debug mode\n'
85+
gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
86+
gg_printf '```\n'
87+
gg_printf '%s\n' "$(cat $OUT/${ci}-ctest.log)"
88+
gg_printf '```\n'
89+
gg_printf '\n'
90+
}
91+
92+
# ctest_release
93+
94+
function gg_run_ctest_release {
95+
cd ${SRC}
96+
97+
rm -rf build-ci-release && mkdir build-ci-release && cd build-ci-release
98+
99+
set -e
100+
101+
(time cmake -DCMAKE_BUILD_TYPE=Release .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
102+
(time make -j ) 2>&1 | tee -a $OUT/${ci}-make.log
103+
104+
if [ -z $GG_BUILD_LOW_PERF ]; then
105+
(time ctest --output-on-failure ) 2>&1 | tee -a $OUT/${ci}-ctest.log
106+
else
107+
(time ctest --output-on-failure -E test-opt ) 2>&1 | tee -a $OUT/${ci}-ctest.log
108+
fi
109+
110+
set +e
111+
}
112+
113+
function gg_sum_ctest_release {
114+
gg_printf '### %s\n\n' "${ci}"
115+
116+
gg_printf 'Runs ctest in release mode\n'
117+
gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
118+
gg_printf '```\n'
119+
gg_printf '%s\n' "$(cat $OUT/${ci}-ctest.log)"
120+
gg_printf '```\n'
121+
}
122+
123+
# open_llama_3b_v2
124+
125+
function gg_run_open_llama_3b_v2 {
126+
cd ${SRC}
127+
128+
gg_wget models-mnt/open-llama/3B-v2/ https://huggingface.co/openlm-research/open_llama_3b_v2/raw/main/config.json
129+
gg_wget models-mnt/open-llama/3B-v2/ https://huggingface.co/openlm-research/open_llama_3b_v2/resolve/main/tokenizer.model
130+
gg_wget models-mnt/open-llama/3B-v2/ https://huggingface.co/openlm-research/open_llama_3b_v2/raw/main/tokenizer_config.json
131+
gg_wget models-mnt/open-llama/3B-v2/ https://huggingface.co/openlm-research/open_llama_3b_v2/raw/main/special_tokens_map.json
132+
gg_wget models-mnt/open-llama/3B-v2/ https://huggingface.co/openlm-research/open_llama_3b_v2/resolve/main/pytorch_model.bin
133+
gg_wget models-mnt/open-llama/3B-v2/ https://huggingface.co/openlm-research/open_llama_3b_v2/raw/main/generation_config.json
134+
135+
gg_wget models-mnt/wikitext/ https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-raw-v1.zip
136+
unzip -o models-mnt/wikitext/wikitext-2-raw-v1.zip -d models-mnt/wikitext/
137+
head -n 60 models-mnt/wikitext/wikitext-2-raw/wiki.test.raw > models-mnt/wikitext/wikitext-2-raw/wiki.test-60.raw
138+
139+
path_models="../models-mnt/open-llama/3B-v2"
140+
path_wiki="../models-mnt/wikitext/wikitext-2-raw"
141+
142+
rm -rf build-ci-release && mkdir build-ci-release && cd build-ci-release
143+
144+
set -e
145+
146+
(time cmake -DCMAKE_BUILD_TYPE=Release -DLLAMA_QKK_64=1 .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
147+
(time make -j ) 2>&1 | tee -a $OUT/${ci}-make.log
148+
149+
python3 ../convert.py ${path_models}
150+
151+
model_f16="${path_models}/ggml-model-f16.bin"
152+
model_q8_0="${path_models}/ggml-model-q8_0.bin"
153+
model_q4_0="${path_models}/ggml-model-q4_0.bin"
154+
model_q4_1="${path_models}/ggml-model-q4_1.bin"
155+
model_q5_0="${path_models}/ggml-model-q5_0.bin"
156+
model_q5_1="${path_models}/ggml-model-q5_1.bin"
157+
model_q3_k="${path_models}/ggml-model-q3_k.bin"
158+
model_q4_k="${path_models}/ggml-model-q4_k.bin"
159+
model_q5_k="${path_models}/ggml-model-q5_k.bin"
160+
model_q6_k="${path_models}/ggml-model-q6_k.bin"
161+
162+
wiki_test_60="${path_wiki}/wiki.test-60.raw"
163+
164+
./bin/quantize ${model_f16} ${model_q8_0} q8_0
165+
./bin/quantize ${model_f16} ${model_q4_0} q4_0
166+
./bin/quantize ${model_f16} ${model_q4_1} q4_1
167+
./bin/quantize ${model_f16} ${model_q5_0} q5_0
168+
./bin/quantize ${model_f16} ${model_q5_1} q5_1
169+
./bin/quantize ${model_f16} ${model_q3_k} q3_k
170+
./bin/quantize ${model_f16} ${model_q4_k} q4_k
171+
./bin/quantize ${model_f16} ${model_q5_k} q5_k
172+
./bin/quantize ${model_f16} ${model_q6_k} q6_k
173+
174+
(time ./bin/main --model ${model_f16} -s 1234 -n 64 -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
175+
(time ./bin/main --model ${model_q8_0} -s 1234 -n 64 -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
176+
(time ./bin/main --model ${model_q4_0} -s 1234 -n 64 -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log
177+
(time ./bin/main --model ${model_q4_1} -s 1234 -n 64 -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log
178+
(time ./bin/main --model ${model_q5_0} -s 1234 -n 64 -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log
179+
(time ./bin/main --model ${model_q5_1} -s 1234 -n 64 -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log
180+
(time ./bin/main --model ${model_q3_k} -s 1234 -n 64 -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log
181+
(time ./bin/main --model ${model_q4_k} -s 1234 -n 64 -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log
182+
(time ./bin/main --model ${model_q5_k} -s 1234 -n 64 -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log
183+
(time ./bin/main --model ${model_q6_k} -s 1234 -n 64 -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log
184+
185+
(time ./bin/perplexity --model ${model_f16} -f ${wiki_test_60} -c 128 -b 128 --chunks 3 ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
186+
(time ./bin/perplexity --model ${model_q8_0} -f ${wiki_test_60} -c 128 -b 128 --chunks 3 ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
187+
(time ./bin/perplexity --model ${model_q4_0} -f ${wiki_test_60} -c 128 -b 128 --chunks 3 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log
188+
(time ./bin/perplexity --model ${model_q4_1} -f ${wiki_test_60} -c 128 -b 128 --chunks 3 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log
189+
(time ./bin/perplexity --model ${model_q5_0} -f ${wiki_test_60} -c 128 -b 128 --chunks 3 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log
190+
(time ./bin/perplexity --model ${model_q5_1} -f ${wiki_test_60} -c 128 -b 128 --chunks 3 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log
191+
(time ./bin/perplexity --model ${model_q3_k} -f ${wiki_test_60} -c 128 -b 128 --chunks 3 ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log
192+
(time ./bin/perplexity --model ${model_q4_k} -f ${wiki_test_60} -c 128 -b 128 --chunks 3 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log
193+
(time ./bin/perplexity --model ${model_q5_k} -f ${wiki_test_60} -c 128 -b 128 --chunks 3 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log
194+
(time ./bin/perplexity --model ${model_q6_k} -f ${wiki_test_60} -c 128 -b 128 --chunks 3 ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log
195+
196+
function check_ppl {
197+
qnt="$1"
198+
ppl=$(echo "$2" | grep -oE "[0-9]+\.[0-9]+" | tail -n 1)
199+
200+
if [ $(echo "$ppl > 20.0" | bc) -eq 1 ]; then
201+
printf ' - %s @ %s (FAIL: ppl > 20.0)\n' "$qnt" "$ppl"
202+
return 20
203+
fi
204+
205+
printf ' - %s @ %s OK\n' "$qnt" "$ppl"
206+
return 0
207+
}
208+
209+
check_ppl "f16" "$(cat $OUT/${ci}-tg-f16.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
210+
check_ppl "q8_0" "$(cat $OUT/${ci}-tg-q8_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
211+
check_ppl "q4_0" "$(cat $OUT/${ci}-tg-q4_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
212+
check_ppl "q4_1" "$(cat $OUT/${ci}-tg-q4_1.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
213+
check_ppl "q5_0" "$(cat $OUT/${ci}-tg-q5_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
214+
check_ppl "q5_1" "$(cat $OUT/${ci}-tg-q5_1.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
215+
check_ppl "q3_k" "$(cat $OUT/${ci}-tg-q3_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
216+
check_ppl "q4_k" "$(cat $OUT/${ci}-tg-q4_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
217+
check_ppl "q5_k" "$(cat $OUT/${ci}-tg-q5_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
218+
check_ppl "q6_k" "$(cat $OUT/${ci}-tg-q6_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
219+
220+
set +e
221+
}
222+
223+
function gg_sum_open_llama_3b_v2 {
224+
gg_printf '### %s\n\n' "${ci}"
225+
226+
gg_printf 'OpenLLaMA 3B-v2:\n'
227+
gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
228+
gg_printf '- perplexity:\n%s\n' "$(cat $OUT/${ci}-ppl.log)"
229+
gg_printf '- f16: \n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-f16.log)"
230+
gg_printf '- q8_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q8_0.log)"
231+
gg_printf '- q4_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q4_0.log)"
232+
gg_printf '- q4_1:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q4_1.log)"
233+
gg_printf '- q5_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q5_0.log)"
234+
gg_printf '- q5_1:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q5_1.log)"
235+
gg_printf '- q3_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q3_k.log)"
236+
gg_printf '- q4_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q4_k.log)"
237+
gg_printf '- q5_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q5_k.log)"
238+
gg_printf '- q6_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q6_k.log)"
239+
}
240+
241+
## main
242+
243+
if [ -z $GG_BUILD_LOW_PERF ]; then
244+
rm -rf ${SRC}/models-mnt
245+
246+
mnt_models=$(realpath ${MNT}/models)
247+
mkdir -p ${mnt_models}
248+
ln -sfn ${mnt_models} ${SRC}/models-mnt
249+
250+
python3 -m pip install -r ${SRC}/requirements.txt
251+
fi
252+
253+
ret=0
254+
255+
#test $ret -eq 0 && gg_run ctest_debug
256+
#test $ret -eq 0 && gg_run ctest_release
257+
258+
if [ -z $GG_BUILD_LOW_PERF ]; then
259+
test $ret -eq 0 && gg_run open_llama_3b_v2
260+
fi
261+
262+
exit $ret

convert-lora-to-ggml.py

100644100755
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
#!/usr/bin/env python
12
import json
23
import os
34
import re

convert.py

100644100755
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
#!/usr/bin/env python
12
import argparse
23
import concurrent.futures
34
import copy

examples/baby-llama/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
set(TARGET baby-llama)
22
add_executable(${TARGET} baby-llama.cpp)
3+
install(TARGETS ${TARGET} RUNTIME)
34
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
45
target_compile_features(${TARGET} PRIVATE cxx_std_11)

examples/benchmark/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
set(TARGET benchmark)
22
add_executable(${TARGET} benchmark-matmult.cpp)
3+
install(TARGETS ${TARGET} RUNTIME)
34
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
45
target_compile_features(${TARGET} PRIVATE cxx_std_11)
56
if(TARGET BUILD_INFO)

0 commit comments

Comments
 (0)