Skip to content

Commit 065e899

Browse files
authored
Merge branch 'main' into patch-10
2 parents f7a6d52 + 2fc98f7 commit 065e899

22 files changed

+521
-100
lines changed

.ci/scripts/run-docs

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,3 +125,20 @@ if [ "$1" == "native" ]; then
125125
bash -x ./run-native.sh
126126
echo "::endgroup::"
127127
fi
128+
129+
if [ "$1" == "distributed" ]; then
130+
131+
echo "::group::Create script to run distributed"
132+
python3 torchchat/utils/scripts/updown.py --file docs/distributed.md > ./run-distributed.sh
133+
# for good measure, if something happened to updown processor,
134+
# and it did not error out, fail with an exit 1
135+
echo "exit 1" >> ./run-distributed.sh
136+
echo "::endgroup::"
137+
138+
echo "::group::Run distributed"
139+
echo "*******************************************"
140+
cat ./run-distributed.sh
141+
echo "*******************************************"
142+
bash -x ./run-distributed.sh
143+
echo "::endgroup::"
144+
fi

.github/workflows/more-tests.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@ on:
99

1010
jobs:
1111
test-cuda:
12+
permissions:
13+
id-token: write
14+
contents: read
1215
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
1316
with:
1417
runner: linux.g5.4xlarge.nvidia.gpu

.github/workflows/periodic.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,9 @@ jobs:
108108
set -eux
109109
PYTHONPATH="${PWD}" python .ci/scripts/gather_test_models.py --event "periodic" --backend "gpu"
110110
test-gpu:
111+
permissions:
112+
id-token: write
113+
contents: read
111114
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
112115
name: test-gpu (${{ matrix.platform }}, ${{ matrix.model_name }})
113116
needs: gather-models-gpu

.github/workflows/pull.yml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,9 @@ jobs:
215215
set -eux
216216
PYTHONPATH="${PWD}" python .ci/scripts/gather_test_models.py --event "pull_request" --backend "gpu"
217217
test-gpu-compile:
218+
permissions:
219+
id-token: write
220+
contents: read
218221
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
219222
name: test-gpu-compile (${{ matrix.platform }}, ${{ matrix.model_name }})
220223
needs: gather-models-gpu
@@ -250,6 +253,9 @@ jobs:
250253
echo "::endgroup::"
251254
252255
test-gpu-aoti-bfloat16:
256+
permissions:
257+
id-token: write
258+
contents: read
253259
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
254260
name: test-gpu-aoti-bfloat16 (${{ matrix.platform }}, ${{ matrix.model_name }})
255261
needs: gather-models-gpu
@@ -286,6 +292,9 @@ jobs:
286292
echo "::endgroup::"
287293
288294
test-gpu-aoti-float32:
295+
permissions:
296+
id-token: write
297+
contents: read
289298
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
290299
name: test-gpu-aoti-float32 (${{ matrix.platform }}, ${{ matrix.model_name }})
291300
needs: gather-models-gpu
@@ -327,6 +336,9 @@ jobs:
327336
echo "::endgroup::"
328337
329338
test-gpu-aoti-float16:
339+
permissions:
340+
id-token: write
341+
contents: read
330342
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
331343
name: test-gpu-aoti-float16 (${{ matrix.platform }}, ${{ matrix.model_name }})
332344
needs: gather-models-gpu
@@ -369,6 +381,9 @@ jobs:
369381
echo "::endgroup::"
370382
371383
test-gpu-eval-sanity-check:
384+
permissions:
385+
id-token: write
386+
contents: read
372387
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
373388
name: test-gpu-eval-sanity-check (${{ matrix.platform }}, ${{ matrix.model_name }})
374389
needs: gather-models-gpu
@@ -1011,6 +1026,9 @@ jobs:
10111026
echo "Tests complete."
10121027
10131028
test-build-runner-et-android:
1029+
permissions:
1030+
id-token: write
1031+
contents: read
10141032
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
10151033
with:
10161034
runner: linux.4xlarge

.github/workflows/run-readme-periodic.yml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@ on:
1010

1111
jobs:
1212
test-readme:
13+
permissions:
14+
id-token: write
15+
contents: read
1316
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
1417
secrets: inherit
1518
with:
@@ -39,6 +42,9 @@ jobs:
3942
4043
4144
test-quantization-any:
45+
permissions:
46+
id-token: write
47+
contents: read
4248
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
4349
with:
4450
runner: linux.g5.4xlarge.nvidia.gpu
@@ -66,6 +72,9 @@ jobs:
6672
echo "::endgroup::"
6773
6874
test-gguf-any:
75+
permissions:
76+
id-token: write
77+
contents: read
6978
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
7079
secrets: inherit
7180
with:

.github/workflows/run-readme-pr-mps.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ jobs:
1010
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
1111
with:
1212
runner: macos-m1-14
13-
timeout-minutes: 50
13+
timeout: 50
1414
script: |
1515
conda create -y -n test-readme-mps-macos python=3.10.11 llvm-openmp
1616
conda activate test-readme-mps-macos
@@ -36,7 +36,7 @@ jobs:
3636
test-quantization-mps-macos:
3737
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
3838
with:
39-
runner: macos-m1-14
39+
runner: macos-m1-14
4040
script: |
4141
set -x
4242
conda create -y -n test-quantization-mps-macos python=3.10.11
@@ -63,7 +63,7 @@ jobs:
6363
test-gguf-mps-macos:
6464
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
6565
with:
66-
runner: macos-m1-14 # neeps MPS, was macos-m1-stable
66+
runner: macos-m1-14 # needs MPS, was macos-m1-stable
6767
script: |
6868
set -x
6969
conda create -y -n test-quantization-mps-macos python=3.10.11
@@ -90,7 +90,7 @@ jobs:
9090
test-advanced-mps-macos:
9191
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
9292
with:
93-
runner: macos-m1-14 # neeps MPS, was macos-m1-stable
93+
runner: macos-m1-14 # needs MPS, was macos-m1-stable
9494
script: |
9595
set -x
9696
conda create -y -n test-quantization-mps-macos python=3.10.11

.github/workflows/run-readme-pr.yml

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@ on:
99

1010
jobs:
1111
test-readme-any:
12+
permissions:
13+
id-token: write
14+
contents: read
1215
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
1316
with:
1417
runner: linux.g5.4xlarge.nvidia.gpu
@@ -28,6 +31,9 @@ jobs:
2831
echo "::endgroup::"
2932
3033
test-readme-cpu:
34+
permissions:
35+
id-token: write
36+
contents: read
3137
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
3238
with:
3339
runner: linux.g5.4xlarge.nvidia.gpu
@@ -47,6 +53,9 @@ jobs:
4753
echo "::endgroup::"
4854
4955
test-quantization-any:
56+
permissions:
57+
id-token: write
58+
contents: read
5059
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
5160
with:
5261
runner: linux.g5.4xlarge.nvidia.gpu
@@ -66,6 +75,9 @@ jobs:
6675
echo "::endgroup::"
6776
6877
test-quantization-cpu:
78+
permissions:
79+
id-token: write
80+
contents: read
6981
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
7082
with:
7183
runner: linux.g5.4xlarge.nvidia.gpu
@@ -80,6 +92,9 @@ jobs:
8092
TORCHCHAT_DEVICE=cpu .ci/scripts/run-docs quantization
8193
8294
test-gguf-any:
95+
permissions:
96+
id-token: write
97+
contents: read
8398
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
8499
with:
85100
runner: linux.g5.4xlarge.nvidia.gpu
@@ -99,6 +114,9 @@ jobs:
99114
echo "::endgroup::"
100115
101116
test-gguf-cpu:
117+
permissions:
118+
id-token: write
119+
contents: read
102120
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
103121
with:
104122
runner: linux.g5.4xlarge.nvidia.gpu
@@ -119,6 +137,9 @@ jobs:
119137
120138
121139
test-advanced-any:
140+
permissions:
141+
id-token: write
142+
contents: read
122143
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
123144
with:
124145
runner: linux.g5.4xlarge.nvidia.gpu
@@ -139,6 +160,9 @@ jobs:
139160
140161
141162
test-advanced-cpu:
163+
permissions:
164+
id-token: write
165+
contents: read
142166
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
143167
with:
144168
runner: linux.g5.4xlarge.nvidia.gpu
@@ -158,6 +182,9 @@ jobs:
158182
echo "::endgroup::"
159183
160184
test-evaluation-any:
185+
permissions:
186+
id-token: write
187+
contents: read
161188
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
162189
with:
163190
runner: linux.g5.4xlarge.nvidia.gpu
@@ -177,6 +204,9 @@ jobs:
177204
echo "::endgroup::"
178205
179206
test-evaluation-cpu:
207+
permissions:
208+
id-token: write
209+
contents: read
180210
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
181211
with:
182212
runner: linux.g5.4xlarge.nvidia.gpu
@@ -196,6 +226,9 @@ jobs:
196226
echo "::endgroup::"
197227
198228
test-multimodal-any:
229+
permissions:
230+
id-token: write
231+
contents: read
199232
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
200233
with:
201234
runner: linux.g5.4xlarge.nvidia.gpu
@@ -215,6 +248,9 @@ jobs:
215248
echo "::endgroup::"
216249
217250
test-multimodal-cpu:
251+
permissions:
252+
id-token: write
253+
contents: read
218254
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
219255
with:
220256
runner: linux.g5.4xlarge.nvidia.gpu
@@ -269,4 +305,4 @@ jobs:
269305
export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH
270306
echo "::endgroup::"
271307
272-
TORCHCHAT_DEVICE=cpu .ci/scripts/run-docs native
308+
TORCHCHAT_DEVICE=cpu .ci/scripts/run-docs native

.github/workflows/runner-cuda-dtype.yml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@ on:
99

1010
jobs:
1111
test-runner-aot-cuda:
12+
permissions:
13+
id-token: write
14+
contents: read
1215
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
1316
with:
1417
runner: linux.g5.4xlarge.nvidia.gpu
@@ -52,7 +55,7 @@ jobs:
5255
5356
python torchchat.py export --checkpoint-path ${MODEL_DIR}/stories15M.pt --output-aoti-package-path /tmp/model.pt2
5457
55-
./cmake-out/aoti_run /tmp/model.pt2 -d CUDA -z ${MODEL_DIR}/tokenizer.model -i "${PROMPT}"
58+
./cmake-out/aoti_run /tmp/model.pt2 -z ${MODEL_DIR}/tokenizer.model -i "${PROMPT}"
5659
5760
done
5861

README.md

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,13 @@ aliases.
6969
|[tinyllamas/stories42M](https://huggingface.co/karpathy/tinyllamas/tree/main)||Toy model for `generate`. Alias to `stories42M`.|
7070
|[tinyllamas/stories110M](https://huggingface.co/karpathy/tinyllamas/tree/main)||Toy model for `generate`. Alias to `stories110M`.|
7171
|[openlm-research/open_llama_7b](https://huggingface.co/openlm-research/open_llama_7b)||Best for `generate`. Alias to `open-llama`.|
72+
| [ibm-granite/granite-3b-code-instruct-128k](https://huggingface.co/ibm-granite/granite-3b-code-instruct-128k) || Alias to `granite-code` and `granite-code-3b`.|
73+
| [ibm-granite/granite-8b-code-instruct-128k](https://huggingface.co/ibm-granite/granite-8b-code-instruct-128k) || Alias to `granite-code-8b`.|
74+
| [ibm-granite/granite-3.0-2b-instruct](https://huggingface.co/ibm-granite/granite-3.0-2b-instruct) || Alias to `granite3-2b` and `granite3`.|
75+
| [ibm-granite/granite-3.0-8b-instruct](https://huggingface.co/ibm-granite/granite-3.0-8b-instruct) || Alias to `granite3-8b`.|
76+
| [ibm-granite/granite-3.1-2b-instruct](https://huggingface.co/ibm-granite/granite-3.1-2b-instruct) || Alias to `granite3.1-2b` and `granite3.1`.|
77+
| [ibm-granite/granite-3.1-8b-instruct](https://huggingface.co/ibm-granite/granite-3.1-8b-instruct) || Alias to `granite3.1-8b`.|
78+
7279

7380
## Installation
7481
The following steps require that you have [Python 3.10](https://www.python.org/downloads/release/python-3100/) installed.
@@ -334,7 +341,7 @@ torchchat/utils/scripts/build_native.sh aoti
334341

335342
Then run the compiled executable, with the pt2.
336343
```bash
337-
cmake-out/aoti_run exportedModels/llama3_1_artifacts.pt2 -z `python3 torchchat.py where llama3.1`/tokenizer.model -l 3 -i "Once upon a time"
344+
cmake-out/aoti_run exportedModels/llama3_1_artifacts.pt2 -z `python3 torchchat.py where llama3.1`/tokenizer.model -i "Once upon a time"
338345
```
339346

340347
## Mobile Execution

0 commit comments

Comments
 (0)