File tree Expand file tree Collapse file tree 3 files changed +48
-1
lines changed Expand file tree Collapse file tree 3 files changed +48
-1
lines changed Original file line number Diff line number Diff line change 28
28
# if: startsWith(github.ref, 'refs/tags')
29
29
uses : pypa/gh-action-pypi-publish@release/v1
30
30
with :
31
- password : ${{ secrets.PYPI_API_TOKEN }}
31
+ password : ${{ secrets.PYPI_API_TOKEN }}
32
+
33
+ docker :
34
+ name : Build and push Docker image
35
+ runs-on : ubuntu-latest
36
+ needs : build-n-publish
37
+ steps :
38
+ - name : Checkout
39
+ uses : actions/checkout@v3
40
+
41
+ - name : Set up QEMU
42
+ uses : docker/setup-qemu-action@v2
43
+
44
+ - name : Set up Docker Buildx
45
+ uses : docker/setup-buildx-action@v2
46
+
47
+ - name : Login to GitHub Container Registry
48
+ uses : docker/login-action@v2
49
+ with :
50
+ registry : ghcr.io
51
+ username : ${{ github.repository_owner }}
52
+ password : ${{ secrets.GITHUB_TOKEN }}
53
+
54
+ - name : Build and push
55
+ uses : docker/build-push-action@v4
56
+ with :
57
+ push : true # push to registry
58
+ pull : true # always fetch the latest base images
59
+ platforms : linux/amd64,linux/arm64 # build for both amd64 and arm64
60
+ tags : ghcr.io/abetlen/llama-cpp-python:latest
Original file line number Diff line number Diff line change
1
+ FROM python:3-buster
2
+
3
+ # We need to set the host to 0.0.0.0 to allow outside access
4
+ ENV HOST 0.0.0.0
5
+
6
+ # Install the package
7
+ RUN pip install llama-cpp-python[server]
8
+
9
+ # Run the server
10
+ CMD python3 -m llama_cpp.server
Original file line number Diff line number Diff line change @@ -66,6 +66,14 @@ python3 -m llama_cpp.server
66
66
67
67
Navigate to [ http://localhost:8000/docs ] ( http://localhost:8000/docs ) to see the OpenAPI documentation.
68
68
69
+ ## Docker image
70
+
71
+ A Docker image is available on [ GHCR] ( https://ghcr.io/abetlen/llama-cpp-python ) . To run the server:
72
+
73
+ ``` bash
74
+ docker run --rm -it -p8000:8000 -v /path/to/models:/models -eMODEL=/models/ggml-vicuna-7b-4bit.bin ghcr.io/abetlen/llama-cpp-python:latest
75
+ ```
76
+
69
77
## Low-level API
70
78
71
79
The low-level API is a direct ` ctypes ` binding to the C API provided by ` llama.cpp ` .
You can’t perform that action at this time.
0 commit comments