Skip to content

Commit 607bb9a

Browse files
authored
Merge pull request #7 from l3utterfly/master
merge from upstream
2 parents 17d2f47 + a8bd14d commit 607bb9a

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

75 files changed

+42678
-19435
lines changed

.devops/full-cuda.Dockerfile

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ FROM ${BASE_CUDA_DEV_CONTAINER} as build
1212
ARG CUDA_DOCKER_ARCH=all
1313

1414
RUN apt-get update && \
15-
apt-get install -y build-essential python3 python3-pip git
15+
apt-get install -y build-essential python3 python3-pip git libcurl4-openssl-dev
1616

1717
COPY requirements.txt requirements.txt
1818
COPY requirements requirements
@@ -28,6 +28,8 @@ COPY . .
2828
ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
2929
# Enable CUDA
3030
ENV LLAMA_CUDA=1
31+
# Enable cURL
32+
ENV LLAMA_CURL=1
3133

3234
RUN make
3335

.devops/full-rocm.Dockerfile

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,11 @@ ENV LLAMA_HIPBLAS=1
4040
ENV CC=/opt/rocm/llvm/bin/clang
4141
ENV CXX=/opt/rocm/llvm/bin/clang++
4242

43+
# Enable cURL
44+
ENV LLAMA_CURL=1
45+
RUN apt-get update && \
46+
apt-get install -y libcurl4-openssl-dev
47+
4348
RUN make
4449

4550
ENTRYPOINT ["/app/.devops/tools.sh"]

.devops/full.Dockerfile

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ ARG UBUNTU_VERSION=22.04
33
FROM ubuntu:$UBUNTU_VERSION as build
44

55
RUN apt-get update && \
6-
apt-get install -y build-essential python3 python3-pip git
6+
apt-get install -y build-essential python3 python3-pip git libcurl4-openssl-dev
77

88
COPY requirements.txt requirements.txt
99
COPY requirements requirements
@@ -15,6 +15,9 @@ WORKDIR /app
1515

1616
COPY . .
1717

18+
ENV LLAMA_CURL=1
19+
20+
1821
RUN make
1922

2023
ENV LC_ALL=C.utf8

.devops/llama-cpp-clblast.srpm.spec

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# SRPM for building from source and packaging an RPM for RPM-based distros.
2-
# https://fedoraproject.org/wiki/How_to_create_an_RPM_package
2+
# https://docs.fedoraproject.org/en-US/quick-docs/creating-rpm-packages
33
# Built and maintained by John Boero - [email protected]
44
# In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal
55

.devops/llama-cpp-cuda.srpm.spec

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# SRPM for building from source and packaging an RPM for RPM-based distros.
2-
# https://fedoraproject.org/wiki/How_to_create_an_RPM_package
2+
# https://docs.fedoraproject.org/en-US/quick-docs/creating-rpm-packages
33
# Built and maintained by John Boero - [email protected]
44
# In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal
55

.devops/llama-cpp.srpm.spec

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# SRPM for building from source and packaging an RPM for RPM-based distros.
2-
# https://fedoraproject.org/wiki/How_to_create_an_RPM_package
2+
# https://docs.fedoraproject.org/en-US/quick-docs/creating-rpm-packages
33
# Built and maintained by John Boero - [email protected]
44
# In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal
55

.devops/server-cuda.Dockerfile

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ FROM ${BASE_CUDA_DEV_CONTAINER} as build
1212
ARG CUDA_DOCKER_ARCH=all
1313

1414
RUN apt-get update && \
15-
apt-get install -y build-essential git
15+
apt-get install -y build-essential git libcurl4-openssl-dev
1616

1717
WORKDIR /app
1818

@@ -22,11 +22,16 @@ COPY . .
2222
ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
2323
# Enable CUDA
2424
ENV LLAMA_CUDA=1
25+
# Enable cURL
26+
ENV LLAMA_CURL=1
2527

2628
RUN make
2729

2830
FROM ${BASE_CUDA_RUN_CONTAINER} as runtime
2931

32+
RUN apt-get update && \
33+
apt-get install -y libcurl4-openssl-dev
34+
3035
COPY --from=build /app/server /server
3136

3237
ENTRYPOINT [ "/server" ]

.devops/server-intel.Dockerfile

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ FROM intel/oneapi-basekit:$ONEAPI_VERSION as build
44

55
ARG LLAMA_SYCL_F16=OFF
66
RUN apt-get update && \
7-
apt-get install -y git
7+
apt-get install -y git libcurl4-openssl-dev
88

99
WORKDIR /app
1010

@@ -16,11 +16,14 @@ RUN mkdir build && \
1616
echo "LLAMA_SYCL_F16 is set" && \
1717
export OPT_SYCL_F16="-DLLAMA_SYCL_F16=ON"; \
1818
fi && \
19-
cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ${OPT_SYCL_F16} && \
19+
cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_CURL=ON ${OPT_SYCL_F16} && \
2020
cmake --build . --config Release --target server
2121

2222
FROM intel/oneapi-basekit:$ONEAPI_VERSION as runtime
2323

24+
RUN apt-get update && \
25+
apt-get install -y libcurl4-openssl-dev
26+
2427
COPY --from=build /app/build/bin/server /server
2528

2629
ENV LC_ALL=C.utf8

.devops/server-rocm.Dockerfile

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,11 @@ ENV LLAMA_HIPBLAS=1
4040
ENV CC=/opt/rocm/llvm/bin/clang
4141
ENV CXX=/opt/rocm/llvm/bin/clang++
4242

43+
# Enable cURL
44+
ENV LLAMA_CURL=1
45+
RUN apt-get update && \
46+
apt-get install -y libcurl4-openssl-dev
47+
4348
RUN make
4449

4550
ENTRYPOINT [ "/app/server" ]

.devops/server-vulkan.Dockerfile

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,16 @@ RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key
1111
apt update -y && \
1212
apt-get install -y vulkan-sdk
1313

14+
# Install cURL
15+
RUN apt-get update && \
16+
apt-get install -y libcurl4-openssl-dev
17+
1418
# Build it
1519
WORKDIR /app
1620
COPY . .
1721
RUN mkdir build && \
1822
cd build && \
19-
cmake .. -DLLAMA_VULKAN=1 && \
23+
cmake .. -DLLAMA_VULKAN=1 -DLLAMA_CURL=1 && \
2024
cmake --build . --config Release --target server
2125

2226
# Clean up

.devops/server.Dockerfile

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,21 @@ ARG UBUNTU_VERSION=22.04
33
FROM ubuntu:$UBUNTU_VERSION as build
44

55
RUN apt-get update && \
6-
apt-get install -y build-essential git
6+
apt-get install -y build-essential git libcurl4-openssl-dev
77

88
WORKDIR /app
99

1010
COPY . .
1111

12+
ENV LLAMA_CURL=1
13+
1214
RUN make
1315

1416
FROM ubuntu:$UBUNTU_VERSION as runtime
1517

18+
RUN apt-get update && \
19+
apt-get install -y libcurl4-openssl-dev
20+
1621
COPY --from=build /app/server /server
1722

1823
ENV LC_ALL=C.utf8

.github/workflows/bench.yml

Lines changed: 29 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -24,15 +24,15 @@ on:
2424
push:
2525
branches:
2626
- master
27-
paths: ['.github/workflows/bench.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/bench/**.*']
28-
pull_request:
27+
paths: ['llama.cpp', 'ggml.c', 'ggml-backend.c', 'ggml-quants.c', '**/*.cu', 'examples/server/*.h*', 'examples/server/*.cpp']
28+
pull_request_target:
2929
types: [opened, synchronize, reopened]
30-
paths: ['.github/workflows/bench.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/bench/**.*']
30+
paths: ['llama.cpp', 'ggml.c', 'ggml-backend.c', 'ggml-quants.c', '**/*.cu', 'examples/server/*.h*', 'examples/server/*.cpp']
3131
schedule:
3232
- cron: '04 2 * * *'
3333

3434
concurrency:
35-
group: ${{ github.workflow }}-${{ github.ref }}
35+
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}-${{ github.event.inputs.sha }}
3636
cancel-in-progress: true
3737

3838
jobs:
@@ -42,11 +42,21 @@ jobs:
4242
RUNNER_LABEL: Standard_NC4as_T4_v3 # FIXME Do not find a way to not duplicate it
4343
N_USERS: 8
4444
DURATION: 10m
45+
46+
strategy:
47+
matrix:
48+
model: [phi-2]
49+
ftype: [q4_0, q8_0, f16]
50+
include:
51+
- model: phi-2
52+
ftype: q4_0
53+
pr_comment_enabled: "true"
54+
4555
if: ${{ github.event.inputs.gpu-series == 'Standard_NC4as_T4_v3' || github.event.schedule || github.event.pull_request || github.head_ref == 'master' || github.ref_name == 'master' || github.event.push.ref == 'refs/heads/master' }}
4656
steps:
4757
- name: Clone
4858
id: checkout
49-
uses: actions/checkout@v3
59+
uses: actions/checkout@v4
5060
with:
5161
fetch-depth: 0
5262
ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}
@@ -116,7 +126,7 @@ jobs:
116126
--scenario script.js \
117127
--duration ${{ github.event.inputs.duration || env.DURATION }} \
118128
--hf-repo ggml-org/models \
119-
--hf-file phi-2/ggml-model-q4_0.gguf \
129+
--hf-file ${{ matrix.model }}/ggml-model-${{ matrix.ftype }}.gguf \
120130
--model-path-prefix /models \
121131
--parallel ${{ env.N_USERS }} \
122132
-ngl 33 \
@@ -134,7 +144,7 @@ jobs:
134144
135145
- uses: actions/upload-artifact@v4
136146
with:
137-
name: benchmark-results
147+
name: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }}
138148
compression-level: 9
139149
path: |
140150
examples/server/bench/*.jpg
@@ -143,11 +153,10 @@ jobs:
143153
144154
- name: Commit status
145155
uses: Sibz/github-status-action@v1
146-
continue-on-error: true # If not authorized on external repo
147156
with:
148157
authToken: ${{secrets.GITHUB_TOKEN}}
149158
sha: ${{ inputs.sha || github.event.pull_request.head.sha || github.sha }}
150-
context: bench-server-baseline
159+
context: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }}
151160
description: |
152161
${{ env.BENCH_RESULTS }}
153162
state: 'success'
@@ -204,21 +213,26 @@ jobs:
204213
- name: Comment PR
205214
uses: mshick/add-pr-comment@v2
206215
id: comment_pr
207-
if: ${{ github.event.pull_request != '' }}
216+
if: ${{ github.event.pull_request != '' && matrix.pr_comment_enabled == 'true' }}
208217
with:
209-
message-id: bench-${{ github.job }}-${{ env.RUNNER_LABEL }}
218+
message-id: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }}
210219
message: |
211-
📈 **llama.cpp server** for _${{ github.job }}_ on _${{ env.RUNNER_LABEL }}_: **${{ env.BENCH_ITERATIONS}} iterations** 🚀
220+
<p align="center">
221+
222+
📈 **llama.cpp server** for _${{ github.job }}_ on _${{ env.RUNNER_LABEL }}_ for `${{ matrix.model }}`-`${{ matrix.ftype }}`: **${{ env.BENCH_ITERATIONS}} iterations** 🚀
223+
224+
</p>
225+
226+
<details>
227+
228+
<summary>Expand details for performance related PR only</summary>
212229
213230
- Concurrent users: ${{ env.N_USERS }}, duration: ${{ github.event.inputs.duration || env.DURATION }}
214231
- HTTP request : avg=${{ env.HTTP_REQ_DURATION_AVG }}ms p(90)=${{ env.HTTP_REQ_DURATION_P_90_ }}ms fails=${{ env.HTTP_REQ_FAILED_PASSES }}, finish reason: stop=${{ env.LLAMACPP_COMPLETIONS_STOP_RATE_PASSES }} truncated=${{ env.LLAMACPP_COMPLETIONS_TRUNCATED_RATE_PASSES }}
215232
- Prompt processing (pp): avg=${{ env.LLAMACPP_PROMPT_TOKENS_AVG }}tk/s p(90)=${{ env.LLAMACPP_PROMPT_TOKENS_P_90_ }}tk/s **total=${{ env.LLAMACPP_PROMPT_TOKENS_TOTAL_COUNTER_RATE }}tk/s**
216233
- Token generation (tg): avg=${{ env.LLAMACPP_TOKENS_SECOND_AVG }}tk/s p(90)=${{ env.LLAMACPP_TOKENS_SECOND_P_90_ }}tk/s **total=${{ env.LLAMACPP_COMPLETION_TOKENS_TOTAL_COUNTER_RATE }}tk/s**
217234
- ${{ env.BENCH_GRAPH_XLABEL }}
218235
219-
<details>
220-
221-
<summary>Time series</summary>
222236
223237
<p align="center">
224238

0 commit comments

Comments
 (0)