Skip to content

Commit 3b224b1

Browse files
committed
server: bench: init
1 parent 43139cc commit 3b224b1

File tree

5 files changed

+448
-9
lines changed

5 files changed

+448
-9
lines changed

.github/workflows/bench.yml

Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
# Benchmark
2+
name: Benchmark
3+
4+
on:
5+
workflow_dispatch:
6+
inputs:
7+
gpu-series:
8+
description: 'Azure GPU series to run with'
9+
required: true
10+
type: choice
11+
options:
12+
- Standard_NC4as_T4_v3
13+
- Standard_NC64as_T4_v3
14+
- Standard_NC24ads_A100_v4
15+
- Standard_NC48ads_A100_v4
16+
- Standard_ND96asr_A100_v4
17+
- Standard_NC40ads_H100_v5
18+
- Standard_NC80adis_H100_v5
19+
push:
20+
branches:
21+
- master
22+
- hp/server/bench/workflow # FIXME remove
23+
paths: ['.github/workflows/bench.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/bench/**.*']
24+
pull_request:
25+
types: [opened, synchronize, reopened]
26+
paths: ['.github/workflows/bench.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/bench/**.*']
27+
schedule:
28+
- cron: '04 2 * * *'
29+
30+
concurrency:
31+
group: ${{ github.workflow }}-${{ github.ref }}
32+
cancel-in-progress: true
33+
34+
jobs:
35+
bench-server-baseline:
36+
runs-on: Standard_NC4as_T4_v3
37+
env:
38+
RUNNER_LABEL: Standard_NC4as_T4_v3 # FIXME Do not find a way to not duplicate it
39+
#if: ${{ github.event.inputs.gpu-series == 'Standard_NC4as_T4_v3' || github.event.schedule || github.event.pull_request || github.event.push.ref == 'refs/heads/master' }}
40+
steps:
41+
- name: Clone
42+
id: checkout
43+
uses: actions/checkout@v3
44+
with:
45+
fetch-depth: 0
46+
47+
- name: TMP
48+
id: tmp
49+
run: |
50+
echo IF: ${{ github.event.inputs.gpu-series == 'Standard_NC4as_T4_v3' || github.event.schedule || github.event.pull_request || github.event.push.ref == 'refs/heads/master' }}
51+
echo github.event.inputs.gpu-series=${{ github.event.inputs.gpu-series }}
52+
echo github.event.pull_request=${{ github.event.pull_request }}
53+
echo github.event.push.ref=${{ github.event.push.ref }}
54+
echo github.event.schedule=${{ github.event.schedule }}
55+
56+
- name: Install python env
57+
id: pipenv
58+
run: |
59+
cd examples/server/bench
60+
python3 -m venv venv
61+
source venv/bin/activate
62+
pip install -r requirements.txt
63+
64+
- name: Prometheus
65+
id: install_prometheus
66+
run: |
67+
wget --quiet https://github.com/prometheus/prometheus/releases/download/v2.51.0/prometheus-2.51.0.linux-amd64.tar.gz
68+
tar xzf prometheus*.tar.gz --strip-components=1
69+
./prometheus --config.file=examples/server/bench/prometheus.yml &
70+
while ! nc -z localhost 9090; do
71+
sleep 0.1
72+
done
73+
74+
- name: Install k6
75+
id: k6_installation
76+
run: |
77+
cd examples/server/bench
78+
wget --quiet https://github.com/grafana/k6/releases/download/v0.49.0/k6-v0.49.0-linux-amd64.tar.gz
79+
tar xzf k6*.tar.gz --strip-components=1
80+
81+
- name: Build
82+
id: cmake_build
83+
run: |
84+
set -eux
85+
mkdir build
86+
cd build
87+
cmake .. \
88+
-DLLAMA_NATIVE=OFF \
89+
-DLLAMA_BUILD_SERVER=ON \
90+
-DLLAMA_CURL=ON \
91+
-DLLAMA_CUBLAS=ON \
92+
-DCUDAToolkit_ROOT=/usr/local/cuda \
93+
-DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc \
94+
-DCMAKE_CUDA_ARCHITECTURES=75 \
95+
-DLLAMA_FATAL_WARNINGS=OFF \
96+
-DLLAMA_ALL_WARNINGS=OFF \
97+
-DCMAKE_BUILD_TYPE=Release;
98+
cmake --build . --config Release -j $(nproc) --target server
99+
100+
- name: Download the dataset
101+
id: download_dataset
102+
run: |
103+
cd examples/server/bench
104+
wget --quiet https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json
105+
106+
- name: Server bench
107+
id: server_bench
108+
run: |
109+
set -eux
110+
111+
cd examples/server/bench
112+
source venv/bin/activate
113+
BENCH_K6_BIN_PATH=./k6 python bench.py \
114+
--runner-label ${{ env.RUNNER_LABEL }} \
115+
--name ${{ github.job }} \
116+
--branch ${{ github.ref_name }} \
117+
--commit ${{ github.sha }} \
118+
--scenario script.js \
119+
--duration 30s \
120+
--hf-repo ggml-org/models \
121+
--hf-file phi-2/ggml-model-q4_0.gguf \
122+
--model-path-prefix /models \
123+
--parallel 8 \
124+
-ngl 33 \
125+
--batch-size 2048 \
126+
--ubatch-size 256 \
127+
--ctx-size 16384 \
128+
--n-prompts 1000 \
129+
--max-prompt-tokens 1024 \
130+
--max-tokens 2048
131+
132+
cat results.github.env >> $GITHUB_ENV
133+
134+
# - name: Comment PR
135+
# uses: mshick/add-pr-comment@v2
136+
# id: comment_pr
137+
# if: ${{ github.event.pull_request != '' }}
138+
# with:
139+
# message-id: bench-${{ github.job }}-${{ env.RUNNER_LABEL }}
140+
# message: |
141+
# $BENCH_PR_COMMENT
142+
143+
- name: Commit status
144+
uses: Sibz/github-status-action@v1
145+
with:
146+
context: ${{ github.job }}
147+
description: |
148+
$BENCH_RESULTS
149+
state: 'success'
150+
151+
- name: Upload results
152+
if: ${{ github.event.pull_request != '' }}
153+
uses: edunad/[email protected]
154+
with:
155+
path: '*.png'
156+
title: |
157+
llama.cpp server benchmark results for ${{ github.job }} on ${{ env.RUNNER_LABEL }}: ${{ env.LLAMACPP_TOKENS_SECOND_AVG}}tk/s
158+
annotationLevel: 'success'

0 commit comments

Comments
 (0)