Skip to content

Commit f6e7e93

Browse files
authored
Merge pull request #8 from l3utterfly/master
merged from upstream
2 parents 607bb9a + 7593639 commit f6e7e93

File tree

101 files changed

+9436
-6088
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

101 files changed

+9436
-6088
lines changed

.github/workflows/bench.yml

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -79,12 +79,18 @@ jobs:
7979
sleep 0.1
8080
done
8181
82-
- name: Install k6
82+
- name: Set up Go
83+
uses: actions/setup-go@v5
84+
with:
85+
go-version: '1.21'
86+
87+
- name: Install k6 and xk6-sse
8388
id: k6_installation
8489
run: |
8590
cd examples/server/bench
86-
wget --quiet https://github.com/grafana/k6/releases/download/v0.49.0/k6-v0.49.0-linux-amd64.tar.gz
87-
tar xzf k6*.tar.gz --strip-components=1
91+
go install go.k6.io/xk6/cmd/xk6@latest
92+
xk6 build master \
93+
--with github.com/phymbert/xk6-sse
8894
8995
- name: Build
9096
id: cmake_build
@@ -118,7 +124,7 @@ jobs:
118124
119125
cd examples/server/bench
120126
source venv/bin/activate
121-
BENCH_K6_BIN_PATH=./k6 python bench.py \
127+
python bench.py \
122128
--runner-label ${{ env.RUNNER_LABEL }} \
123129
--name ${{ github.job }} \
124130
--branch ${{ github.head_ref || github.ref_name }} \
@@ -228,9 +234,9 @@ jobs:
228234
<summary>Expand details for performance related PR only</summary>
229235
230236
- Concurrent users: ${{ env.N_USERS }}, duration: ${{ github.event.inputs.duration || env.DURATION }}
231-
- HTTP request : avg=${{ env.HTTP_REQ_DURATION_AVG }}ms p(90)=${{ env.HTTP_REQ_DURATION_P_90_ }}ms fails=${{ env.HTTP_REQ_FAILED_PASSES }}, finish reason: stop=${{ env.LLAMACPP_COMPLETIONS_STOP_RATE_PASSES }} truncated=${{ env.LLAMACPP_COMPLETIONS_TRUNCATED_RATE_PASSES }}
232-
- Prompt processing (pp): avg=${{ env.LLAMACPP_PROMPT_TOKENS_AVG }}tk/s p(90)=${{ env.LLAMACPP_PROMPT_TOKENS_P_90_ }}tk/s **total=${{ env.LLAMACPP_PROMPT_TOKENS_TOTAL_COUNTER_RATE }}tk/s**
233-
- Token generation (tg): avg=${{ env.LLAMACPP_TOKENS_SECOND_AVG }}tk/s p(90)=${{ env.LLAMACPP_TOKENS_SECOND_P_90_ }}tk/s **total=${{ env.LLAMACPP_COMPLETION_TOKENS_TOTAL_COUNTER_RATE }}tk/s**
237+
- HTTP request : avg=${{ env.HTTP_REQ_DURATION_AVG }}ms p(95)=${{ env.HTTP_REQ_DURATION_P_95_ }}ms fails=${{ env.HTTP_REQ_FAILED_PASSES }}, finish reason: stop=${{ env.LLAMACPP_COMPLETIONS_STOP_RATE_PASSES }} truncated=${{ env.LLAMACPP_COMPLETIONS_TRUNCATED_RATE_PASSES }}
238+
- Prompt processing (pp): avg=${{ env.LLAMACPP_PROMPT_PROCESSING_SECOND_AVG }}tk/s p(95)=${{ env.LLAMACPP_PROMPT_PROCESSING_SECOND_P_95_ }}tk/s
239+
- Token generation (tg): avg=${{ env.LLAMACPP_TOKENS_SECOND_AVG }}tk/s p(95)=${{ env.LLAMACPP_TOKENS_SECOND_P_95_ }}tk/s
234240
- ${{ env.BENCH_GRAPH_XLABEL }}
235241
236242

.github/workflows/build.yml

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ jobs:
5252
id: cmake_test
5353
run: |
5454
cd build
55-
ctest -L main --verbose --timeout 900
55+
ctest -L 'main|curl' --verbose --timeout 900
5656
5757
- name: Determine tag name
5858
id: tag
@@ -101,7 +101,9 @@ jobs:
101101
sysctl -a
102102
mkdir build
103103
cd build
104-
cmake -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_METAL_EMBED_LIBRARY=ON -DLLAMA_CURL=ON ..
104+
# Metal is disabled due to intermittent failures with Github runners not having a GPU:
105+
# https://github.com/ggerganov/llama.cpp/actions/runs/8635935781/job/23674807267#step:5:2313
106+
cmake -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_METAL=OFF -DLLAMA_CURL=ON ..
105107
cmake --build . --config Release -j $(sysctl -n hw.logicalcpu)
106108
107109
- name: Test
@@ -209,21 +211,21 @@ jobs:
209211
id: depends
210212
run: |
211213
sudo apt-get update
212-
sudo apt-get install build-essential
214+
sudo apt-get install build-essential libcurl4-openssl-dev
213215
214216
- name: Build
215217
id: cmake_build
216218
run: |
217219
mkdir build
218220
cd build
219-
cmake .. -DLLAMA_FATAL_WARNINGS=ON
221+
cmake .. -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_CURL=ON
220222
cmake --build . --config Release -j $(nproc)
221223
222224
- name: Test
223225
id: cmake_test
224226
run: |
225227
cd build
226-
ctest -L main --verbose --timeout 900
228+
ctest -L 'main|curl' --verbose --timeout 900
227229
228230
- name: Test llama2c conversion
229231
id: llama2c_test
@@ -938,6 +940,12 @@ jobs:
938940
- name: Download artifacts
939941
id: download-artifact
940942
uses: actions/download-artifact@v4
943+
with:
944+
path: ./artifact
945+
946+
- name: Move artifacts
947+
id: move_artifacts
948+
run: mkdir -p ./artifact/release && mv ./artifact/*/*.zip ./artifact/release
941949

942950
- name: Create release
943951
id: create_release
@@ -956,15 +964,15 @@ jobs:
956964
const path = require('path');
957965
const fs = require('fs');
958966
const release_id = '${{ steps.create_release.outputs.id }}';
959-
for (let file of await fs.readdirSync('./artifact')) {
967+
for (let file of await fs.readdirSync('./artifact/release')) {
960968
if (path.extname(file) === '.zip') {
961969
console.log('uploadReleaseAsset', file);
962970
await github.repos.uploadReleaseAsset({
963971
owner: context.repo.owner,
964972
repo: context.repo.repo,
965973
release_id: release_id,
966974
name: file,
967-
data: await fs.readFileSync(`./artifact/${file}`)
975+
data: await fs.readFileSync(`./artifact/release/${file}`)
968976
});
969977
}
970978
}

.github/workflows/docker.yml

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,14 +91,20 @@ jobs:
9191
echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
9292
fi
9393
94+
- name: Downcase github.repository_owner
95+
run: |
96+
echo "repository_owner_lowercase=${GITHUB_REPOSITORY_OWNER@L}" >> $GITHUB_ENV
97+
env:
98+
GITHUB_REPOSITORY_OWNER: '${{ github.repository_owner }}'
99+
94100
- name: Build and push Docker image (versioned)
95101
if: github.event_name == 'push'
96102
uses: docker/build-push-action@v4
97103
with:
98104
context: .
99105
push: true
100106
platforms: ${{ matrix.config.platforms }}
101-
tags: "ghcr.io/${{ github.repository_owner }}/llama.cpp:${{ matrix.config.tag }}-${{ env.COMMIT_SHA }}"
107+
tags: "ghcr.io/${{ env.repository_owner_lowercase }}/llama.cpp:${{ matrix.config.tag }}-${{ env.COMMIT_SHA }}"
102108
file: ${{ matrix.config.dockerfile }}
103109

104110
- name: Build and push Docker image (tagged)
@@ -107,5 +113,5 @@ jobs:
107113
context: .
108114
push: ${{ github.event_name == 'push' }}
109115
platforms: ${{ matrix.config.platforms }}
110-
tags: "ghcr.io/${{ github.repository_owner }}/llama.cpp:${{ matrix.config.tag }},ghcr.io/${{ github.repository_owner }}/llama.cpp:${{ matrix.config.tag }}-${{ steps.tag.outputs.name }}"
116+
tags: "ghcr.io/${{ env.repository_owner_lowercase }}/llama.cpp:${{ matrix.config.tag }},ghcr.io/${{ env.repository_owner_lowercase }}/llama.cpp:${{ matrix.config.tag }}-${{ steps.tag.outputs.name }}"
111117
file: ${{ matrix.config.dockerfile }}

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ models-mnt
4949
/convert-llama2c-to-ggml
5050
/embd-input-test
5151
/embedding
52+
/eval-callback
5253
/gguf
5354
/gguf-llama-simple
5455
/gguf-split

0 commit comments

Comments
 (0)