Skip to content

Commit 7a2c926

Browse files
authored
ci: bench: add more ftype, fix triggers and bot comment (#6466)
* ci: bench: change trigger path to not spawn on each PR * ci: bench: add more file type for phi-2: q8_0 and f16. - do not show the comment by default * ci: bench: add seed parameter in k6 script * ci: bench: artefact name perf job * Add iteration in the commit status, reduce again the autocomment * ci: bench: add per slot metric in the commit status * Fix trailing spaces
1 parent 4bcd6b9 commit 7a2c926

File tree

3 files changed

+39
-17
lines changed

3 files changed

+39
-17
lines changed

.github/workflows/bench.yml

Lines changed: 26 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,10 @@ on:
2424
push:
2525
branches:
2626
- master
27-
paths: ['.github/workflows/bench.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/bench/**.*']
27+
paths: ['llama.cpp', 'ggml.c', 'ggml-backend.c', 'ggml-quants.c', '**/*.cu', 'examples/server/*.h*', 'examples/server/*.cpp']
2828
pull_request_target:
2929
types: [opened, synchronize, reopened]
30-
paths: ['.github/workflows/bench.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/bench/**.*']
30+
paths: ['llama.cpp', 'ggml.c', 'ggml-backend.c', 'ggml-quants.c', '**/*.cu', 'examples/server/*.h*', 'examples/server/*.cpp']
3131
schedule:
3232
- cron: '04 2 * * *'
3333

@@ -42,6 +42,16 @@ jobs:
4242
RUNNER_LABEL: Standard_NC4as_T4_v3 # FIXME Do not find a way to not duplicate it
4343
N_USERS: 8
4444
DURATION: 10m
45+
46+
strategy:
47+
matrix:
48+
model: [phi-2]
49+
ftype: [q4_0, q8_0, f16]
50+
include:
51+
- model: phi-2
52+
ftype: q4_0
53+
pr_comment_enabled: "true"
54+
4555
if: ${{ github.event.inputs.gpu-series == 'Standard_NC4as_T4_v3' || github.event.schedule || github.event.pull_request || github.head_ref == 'master' || github.ref_name == 'master' || github.event.push.ref == 'refs/heads/master' }}
4656
steps:
4757
- name: Clone
@@ -116,7 +126,7 @@ jobs:
116126
--scenario script.js \
117127
--duration ${{ github.event.inputs.duration || env.DURATION }} \
118128
--hf-repo ggml-org/models \
119-
--hf-file phi-2/ggml-model-q4_0.gguf \
129+
--hf-file ${{ matrix.model }}/ggml-model-${{ matrix.ftype }}.gguf \
120130
--model-path-prefix /models \
121131
--parallel ${{ env.N_USERS }} \
122132
-ngl 33 \
@@ -134,7 +144,7 @@ jobs:
134144
135145
- uses: actions/upload-artifact@v4
136146
with:
137-
name: benchmark-results
147+
name: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }}
138148
compression-level: 9
139149
path: |
140150
examples/server/bench/*.jpg
@@ -146,7 +156,7 @@ jobs:
146156
with:
147157
authToken: ${{secrets.GITHUB_TOKEN}}
148158
sha: ${{ inputs.sha || github.event.pull_request.head.sha || github.sha }}
149-
context: bench-server-baseline
159+
context: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }}
150160
description: |
151161
${{ env.BENCH_RESULTS }}
152162
state: 'success'
@@ -203,21 +213,26 @@ jobs:
203213
- name: Comment PR
204214
uses: mshick/add-pr-comment@v2
205215
id: comment_pr
206-
if: ${{ github.event.pull_request != '' }}
216+
if: ${{ github.event.pull_request != '' && matrix.pr_comment_enabled == 'true' }}
207217
with:
208-
message-id: bench-${{ github.job }}-${{ env.RUNNER_LABEL }}
218+
message-id: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }}
209219
message: |
210-
📈 **llama.cpp server** for _${{ github.job }}_ on _${{ env.RUNNER_LABEL }}_: **${{ env.BENCH_ITERATIONS}} iterations** 🚀
220+
<p align="center">
221+
222+
📈 **llama.cpp server** for _${{ github.job }}_ on _${{ env.RUNNER_LABEL }}_ for `${{ matrix.model }}`-`${{ matrix.ftype }}`: **${{ env.BENCH_ITERATIONS}} iterations** 🚀
223+
224+
</p>
225+
226+
<details>
227+
228+
<summary>Expand details for performance related PR only</summary>
211229
212230
- Concurrent users: ${{ env.N_USERS }}, duration: ${{ github.event.inputs.duration || env.DURATION }}
213231
- HTTP request : avg=${{ env.HTTP_REQ_DURATION_AVG }}ms p(90)=${{ env.HTTP_REQ_DURATION_P_90_ }}ms fails=${{ env.HTTP_REQ_FAILED_PASSES }}, finish reason: stop=${{ env.LLAMACPP_COMPLETIONS_STOP_RATE_PASSES }} truncated=${{ env.LLAMACPP_COMPLETIONS_TRUNCATED_RATE_PASSES }}
214232
- Prompt processing (pp): avg=${{ env.LLAMACPP_PROMPT_TOKENS_AVG }}tk/s p(90)=${{ env.LLAMACPP_PROMPT_TOKENS_P_90_ }}tk/s **total=${{ env.LLAMACPP_PROMPT_TOKENS_TOTAL_COUNTER_RATE }}tk/s**
215233
- Token generation (tg): avg=${{ env.LLAMACPP_TOKENS_SECOND_AVG }}tk/s p(90)=${{ env.LLAMACPP_TOKENS_SECOND_P_90_ }}tk/s **total=${{ env.LLAMACPP_COMPLETION_TOKENS_TOTAL_COUNTER_RATE }}tk/s**
216234
- ${{ env.BENCH_GRAPH_XLABEL }}
217235
218-
<details>
219-
220-
<summary>Time series</summary>
221236
222237
<p align="center">
223238

examples/server/bench/bench.py

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import matplotlib.dates
1717
import matplotlib.pyplot as plt
1818
import requests
19+
from statistics import mean
1920

2021

2122
def main(args_in: list[str] | None = None) -> None:
@@ -109,6 +110,7 @@ def main(args_in: list[str] | None = None) -> None:
109110

110111
# Prometheus
111112
end_time = time.time()
113+
prometheus_metrics = {}
112114
if is_server_listening("0.0.0.0", 9090):
113115
metrics = ['prompt_tokens_seconds', 'predicted_tokens_seconds',
114116
'kv_cache_usage_ratio', 'requests_processing', 'requests_deferred']
@@ -127,6 +129,7 @@ def main(args_in: list[str] | None = None) -> None:
127129
values = metric_data['data']['result'][0]['values']
128130
timestamps, metric_values = zip(*values)
129131
metric_values = [float(value) for value in metric_values]
132+
prometheus_metrics[metric] = metric_values
130133
timestamps_dt = [datetime.fromtimestamp(int(ts)) for ts in timestamps]
131134
plt.figure(figsize=(16, 10), dpi=80)
132135
plt.plot(timestamps_dt, metric_values, label=metric)
@@ -176,17 +179,20 @@ def main(args_in: list[str] | None = None) -> None:
176179

177180
# 140 chars max for commit status description
178181
bench_results = {
182+
"i": iterations,
179183
"req": {
180-
"p90": data['metrics']["http_req_duration"]["p(90)"],
181-
"avg": data['metrics']["http_req_duration"]["avg"],
184+
"p90": round(data['metrics']["http_req_duration"]["p(90)"], 2),
185+
"avg": round(data['metrics']["http_req_duration"]["avg"], 2),
182186
},
183187
"pp": {
184-
"p90": data['metrics']["llamacpp_prompt_tokens"]["p(90)"],
185-
"avg": data['metrics']["llamacpp_prompt_tokens"]["avg"],
188+
"p90": round(data['metrics']["llamacpp_prompt_tokens"]["p(90)"], 2),
189+
"avg": round(data['metrics']["llamacpp_prompt_tokens"]["avg"], 2),
190+
"0": round(mean(prometheus_metrics['prompt_tokens_seconds']), 2),
186191
},
187192
"tg": {
188-
"p90": data['metrics']["llamacpp_tokens_second"]["p(90)"],
189-
"avg": data['metrics']["llamacpp_tokens_second"]["avg"],
193+
"p90": round(data['metrics']["llamacpp_tokens_second"]["p(90)"], 2),
194+
"avg": round(data['metrics']["llamacpp_tokens_second"]["avg"], 2),
195+
"0": round(mean(prometheus_metrics['predicted_tokens_seconds']), 2),
190196
},
191197
}
192198
with open("results.github.env", 'a') as github_env:

examples/server/bench/script.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ export default function () {
8787
],
8888
"model": model,
8989
"stream": false,
90+
"seed": 42,
9091
"max_tokens": max_tokens
9192
}
9293

0 commit comments

Comments
 (0)