File tree Expand file tree Collapse file tree 7 files changed +8
-8
lines changed Expand file tree Collapse file tree 7 files changed +8
-8
lines changed Original file line number Diff line number Diff line change @@ -20,7 +20,7 @@ default-members = [
20
20
resolver = " 2"
21
21
22
22
[workspace .package ]
23
- version = " 2.2 .1-dev0"
23
+ version = " 2.3 .1-dev0"
24
24
edition = " 2021"
25
25
authors = [" Olivier Dehaene" ]
26
26
homepage = " https://github.com/huggingface/text-generation-inference"
Original file line number Diff line number Diff line change @@ -83,7 +83,7 @@ model=HuggingFaceH4/zephyr-7b-beta
83
83
volume=$PWD /data
84
84
85
85
docker run --gpus all --shm-size 1g -p 8080:80 -v $volume :/data \
86
- ghcr.io/huggingface/text-generation-inference:2.2 .0 --model-id $model
86
+ ghcr.io/huggingface/text-generation-inference:2.3 .0 --model-id $model
87
87
```
88
88
89
89
And then you can make requests like
Original file line number Diff line number Diff line change 10
10
"name" : " Apache 2.0" ,
11
11
"url" : " https://www.apache.org/licenses/LICENSE-2.0"
12
12
},
13
- "version" : " 2.2 .1-dev0"
13
+ "version" : " 2.3 .1-dev0"
14
14
},
15
15
"paths" : {
16
16
"/" : {
Original file line number Diff line number Diff line change @@ -11,7 +11,7 @@ volume=$PWD/data # share a volume with the Docker container to avoid downloading
11
11
docker run --rm -it --cap-add=SYS_PTRACE --security-opt seccomp=unconfined \
12
12
--device=/dev/kfd --device=/dev/dri --group-add video \
13
13
--ipc=host --shm-size 256g --net host -v $volume :/data \
14
- ghcr.io/huggingface/text-generation-inference:2.2 .0-rocm \
14
+ ghcr.io/huggingface/text-generation-inference:2.3 .0-rocm \
15
15
--model-id $model
16
16
```
17
17
Original file line number Diff line number Diff line change @@ -12,7 +12,7 @@ volume=$PWD/data # share a volume with the Docker container to avoid downloading
12
12
docker run --rm --privileged --cap-add=sys_nice \
13
13
--device=/dev/dri \
14
14
--ipc=host --shm-size 1g --net host -v $volume :/data \
15
- ghcr.io/huggingface/text-generation-inference:2.2 .0-intel-xpu \
15
+ ghcr.io/huggingface/text-generation-inference:2.3 .0-intel-xpu \
16
16
--model-id $model --cuda-graphs 0
17
17
```
18
18
@@ -29,7 +29,7 @@ volume=$PWD/data # share a volume with the Docker container to avoid downloading
29
29
docker run --rm --privileged --cap-add=sys_nice \
30
30
--device=/dev/dri \
31
31
--ipc=host --shm-size 1g --net host -v $volume :/data \
32
- ghcr.io/huggingface/text-generation-inference:2.2 .0-intel-cpu \
32
+ ghcr.io/huggingface/text-generation-inference:2.3 .0-intel-cpu \
33
33
--model-id $model --cuda-graphs 0
34
34
```
35
35
Original file line number Diff line number Diff line change @@ -11,7 +11,7 @@ model=teknium/OpenHermes-2.5-Mistral-7B
11
11
volume=$PWD /data # share a volume with the Docker container to avoid downloading weights every run
12
12
13
13
docker run --gpus all --shm-size 64g -p 8080:80 -v $volume :/data \
14
- ghcr.io/huggingface/text-generation-inference:2.2 .0 \
14
+ ghcr.io/huggingface/text-generation-inference:2.3 .0 \
15
15
--model-id $model
16
16
```
17
17
Original file line number Diff line number Diff line change @@ -11,7 +11,7 @@ model=teknium/OpenHermes-2.5-Mistral-7B
11
11
volume=$PWD /data # share a volume with the Docker container to avoid downloading weights every run
12
12
13
13
docker run --gpus all --shm-size 1g -p 8080:80 -v $volume :/data \
14
- ghcr.io/huggingface/text-generation-inference:2.2 .0 \
14
+ ghcr.io/huggingface/text-generation-inference:2.3 .0 \
15
15
--model-id $model
16
16
```
17
17
You can’t perform that action at this time.
0 commit comments