42
42
# (matrix.host-platform == 'win-64' && 'windows-amd64-cpu8') }}
43
43
outputs :
44
44
BUILD_CTK_VER : ${{ steps.pass_env.outputs.CUDA_VERSION }}
45
+ defaults :
46
+ run :
47
+ shell : bash --noprofile --norc -xeuo pipefail {0}
45
48
steps :
46
49
- name : Checkout ${{ github.event.repository.name }}
47
50
uses : actions/checkout@v4
62
65
uses : ilammy/msvc-dev-cmd@v1
63
66
64
67
- name : Set environment variables
65
- shell : bash --noprofile --norc -xeuo pipefail {0}
66
68
run : |
67
69
PYTHON_VERSION_FORMATTED=$(echo '${{ matrix.python-version }}' | tr -d '.')
68
70
if [[ "${{ matrix.host-platform }}" == linux* ]]; then
@@ -75,14 +77,60 @@ jobs:
75
77
fi
76
78
77
79
echo "PARALLEL_LEVEL=$(nproc)" >> $GITHUB_ENV
78
- echo "CUDA_CORE_ARTIFACT_NAME=cuda-core-python${PYTHON_VERSION_FORMATTED}-${{ matrix.host-platform }}-${{ github.sha }}" >> $GITHUB_ENV
80
+ CUDA_CORE_ARTIFACT_BASENAME="cuda-core-python${PYTHON_VERSION_FORMATTED}-${{ matrix.host-platform }}"
81
+ echo "CUDA_CORE_ARTIFACT_BASENAME=${CUDA_CORE_ARTIFACT_BASENAME}" >> $GITHUB_ENV
82
+ echo "CUDA_CORE_ARTIFACT_NAME=${CUDA_CORE_ARTIFACT_BASENAME}-${{ github.sha }}" >> $GITHUB_ENV
79
83
echo "CUDA_CORE_ARTIFACTS_DIR=$(realpath "$REPO_DIR/cuda_core/dist")" >> $GITHUB_ENV
80
- echo "CUDA_BINDINGS_ARTIFACT_NAME=cuda-bindings-python${PYTHON_VERSION_FORMATTED}-cuda${{ matrix.cuda-version }}-${{ matrix.host-platform }}-${{ github.sha }}" >> $GITHUB_ENV
84
+ CUDA_BINDINGS_ARTIFACT_BASENAME="cuda-bindings-python${PYTHON_VERSION_FORMATTED}-cuda${{ matrix.cuda-version }}-${{ matrix.host-platform }}"
85
+ echo "CUDA_BINDINGS_ARTIFACT_BASENAME=${CUDA_BINDINGS_ARTIFACT_BASENAME}" >> $GITHUB_ENV
86
+ echo "CUDA_BINDINGS_ARTIFACT_NAME=${CUDA_BINDINGS_ARTIFACT_BASENAME}-${{ github.sha }}" >> $GITHUB_ENV
81
87
echo "CUDA_BINDINGS_ARTIFACTS_DIR=$(realpath "$REPO_DIR/cuda_bindings/dist")" >> $GITHUB_ENV
82
88
echo "CIBW_BUILD=${CIBW_BUILD}" >> $GITHUB_ENV
83
-
89
+
90
+ # When the CI is run due to merging to main, we want it to populate GHA Cache not Artifacts,
91
+ # so that CI workflows running on every branch have a fallback to use.
92
+ if [[ "${{ github.ref_name}}" == main ]]; then
93
+ echo "USE_CACHE=1" >> $GITHUB_ENV
94
+ else
95
+ echo "USE_CACHE=0" >> $GITHUB_ENV
96
+ fi
97
+
98
+ # TODO: revert me before merging; this is to test the cache restore in the PR
99
+ echo "USE_CACHE=1" >> $GITHUB_ENV
100
+
101
+ - name : Install dependencies
102
+ if : ${{ env.USE_CACHE == '1' }}
103
+ run : |
104
+ # For GHA Cache
105
+ dependencies=(zstd)
106
+ dependent_exes=(zstd)
107
+
108
+ not_found=0
109
+ for dep in ${dependent_exes[@]}; do
110
+ if ! (command -v curl 2>&1 >/dev/null); then
111
+ not_found=1
112
+ break
113
+ fi
114
+ done
115
+ if [[ $not_found == 0 ]]; then
116
+ echo "All dependencies are found. Do nothing."
117
+ exit 0
118
+ fi
119
+ if ! (command -v sudo 2>&1 >/dev/null); then
120
+ if [[ $EUID == 0 ]]; then
121
+ alias SUDO=""
122
+ else
123
+ echo "The following oprations require root access."
124
+ exit 1
125
+ fi
126
+ else
127
+ alias SUDO="sudo"
128
+ fi
129
+ shopt -s expand_aliases
130
+ SUDO apt update
131
+ SUDO apt install -y ${dependencies[@]}
132
+
84
133
- name : Dump environment
85
- shell : bash --noprofile --norc -xeuo pipefail {0}
86
134
run : |
87
135
env
88
136
97
145
output-dir : ${{ env.CUDA_CORE_ARTIFACTS_DIR }}
98
146
99
147
- name : List the cuda.core artifacts directory
100
- shell : bash --noprofile --norc -xeuo pipefail {0}
101
148
run : |
102
149
if [[ "${{ matrix.host-platform }}" == win* ]]; then
103
150
export CHOWN=chown
@@ -108,19 +155,42 @@ jobs:
108
155
ls -lahR ${{ env.CUDA_CORE_ARTIFACTS_DIR }}
109
156
110
157
- name : Check cuda.core wheel
111
- shell : bash --noprofile --norc -xeuo pipefail {0}
112
158
run : |
113
159
pip install twine
114
160
twine check ${{ env.CUDA_CORE_ARTIFACTS_DIR }}/*.whl
115
161
116
162
- name : Upload cuda.core build artifacts
163
+ if : ${{ env.USE_CACHE == '0' }}
117
164
uses : actions/upload-artifact@v4
118
165
with :
119
166
name : ${{ env.CUDA_CORE_ARTIFACT_NAME }}
120
167
path : ${{ env.CUDA_CORE_ARTIFACTS_DIR }}/*.whl
121
168
if-no-files-found : error
122
169
overwrite : ' true'
123
170
171
+ - name : Prepare cuda.core cache
172
+ if : ${{ env.USE_CACHE == '1' }}
173
+ env :
174
+ GH_TOKEN : ${{ secrets.GITHUB_TOKEN }}
175
+ run : |
176
+ if [[ "${{ env.USE_CACHE }}" == 1 ]]; then
177
+ # this file is uploaded to GHA Cache
178
+ tar -c -f "${{ env.CUDA_CORE_ARTIFACT_BASENAME }}.tar.gz" -C "${{ env.CUDA_CORE_ARTIFACTS_DIR }}" .
179
+ du -h "${{ env.CUDA_CORE_ARTIFACT_BASENAME }}.tar.gz"
180
+ # check if the previous runs from the same PR have populated the cache, if so need to clean it up
181
+ CACHE_KEY=${{ env.CUDA_CORE_ARTIFACT_NAME }}
182
+ if [ $(gh cache list | grep $CACHE_KEY | wc -l) == "1" ]; then
183
+ gh cache delete $CACHE_KEY
184
+ fi
185
+ fi
186
+
187
+ - name : Cache cuda.core build artifacts
188
+ if : ${{ env.USE_CACHE == '1' }}
189
+ uses : actions/cache/save@v4
190
+ with :
191
+ key : ${{ env.CUDA_CORE_ARTIFACT_NAME }}
192
+ path : ${{ env.CUDA_CORE_ARTIFACT_BASENAME }}.tar.gz
193
+
124
194
- name : Set up mini CTK
125
195
uses : ./.github/actions/fetch_ctk
126
196
continue-on-error : false
@@ -146,7 +216,6 @@ jobs:
146
216
output-dir : ${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}
147
217
148
218
- name : List the cuda.bindings artifacts directory
149
- shell : bash --noprofile --norc -xeuo pipefail {0}
150
219
run : |
151
220
if [[ "${{ matrix.host-platform }}" == win* ]]; then
152
221
export CHOWN=chown
@@ -158,18 +227,41 @@ jobs:
158
227
159
228
# TODO: enable this after NVIDIA/cuda-python#297 is resolved
160
229
# - name: Check cuda.bindings wheel
161
- # shell: bash --noprofile --norc -xeuo pipefail {0}
162
230
# run: |
163
231
# twine check ${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}/*.whl
164
232
233
+ - name : Prepare cuda.bindings cache
234
+ if : ${{ env.USE_CACHE == '1' }}
235
+ env :
236
+ GH_TOKEN : ${{ secrets.GITHUB_TOKEN }}
237
+ run : |
238
+ if [[ "${{ env.USE_CACHE }}" == 1 ]]; then
239
+ # this file is uploaded to GHA Cache
240
+ tar -c -f "${{ env.CUDA_BINDINGS_ARTIFACT_BASENAME }}.tar.gz" -C "${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}" .
241
+ du -h "${{ env.CUDA_BINDINGS_ARTIFACT_BASENAME }}.tar.gz"
242
+ # check if the previous runs from the same PR have populated the cache, if so need to clean it up
243
+ CACHE_KEY=${{ env.CUDA_BINDINGS_ARTIFACT_NAME }}
244
+ if [ $(gh cache list | grep $CACHE_KEY | wc -l) == "1" ]; then
245
+ gh cache delete $CACHE_KEY
246
+ fi
247
+ fi
248
+
165
249
- name : Upload cuda.bindings build artifacts
250
+ if : ${{ env.USE_CACHE == '0' }}
166
251
uses : actions/upload-artifact@v4
167
252
with :
168
253
name : ${{ env.CUDA_BINDINGS_ARTIFACT_NAME }}
169
254
path : ${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}/*.whl
170
255
if-no-files-found : error
171
256
overwrite : ' true'
172
257
258
+ - name : Cache cuda.bindings build artifacts
259
+ if : ${{ env.USE_CACHE == '1' }}
260
+ uses : actions/cache/save@v4
261
+ with :
262
+ key : ${{ env.CUDA_BINDINGS_ARTIFACT_NAME }}
263
+ path : ${{ env.CUDA_BINDINGS_ARTIFACT_BASENAME }}.tar.gz
264
+
173
265
- name : Pass environment variables to the next runner
174
266
id : pass_env
175
267
run : |
@@ -205,7 +297,7 @@ jobs:
205
297
runner : H100
206
298
name : Test (${{ matrix.host-platform }}, Python ${{ matrix.python-version }}, CUDA ${{ matrix.cuda-version }}, Runner ${{ matrix.runner }})
207
299
# The build stage could fail but we want the CI to keep moving.
208
- if : ${{ github.repository_owner == 'nvidia' && always () }}
300
+ if : ${{ github.repository_owner == 'nvidia' && !cancelled () }}
209
301
permissions :
210
302
id-token : write # This is required for configure-aws-credentials
211
303
contents : read # This is required for actions/checkout
@@ -221,9 +313,11 @@ jobs:
221
313
NVIDIA_VISIBLE_DEVICES : ${{ env.NVIDIA_VISIBLE_DEVICES }}
222
314
needs :
223
315
- build
316
+ defaults :
317
+ run :
318
+ shell : bash --noprofile --norc -xeuo pipefail {0}
224
319
steps :
225
320
- name : Ensure GPU is working
226
- shell : bash --noprofile --norc -xeuo pipefail {0}
227
321
run : nvidia-smi
228
322
229
323
- name : Checkout ${{ github.event.repository.name }}
@@ -232,7 +326,6 @@ jobs:
232
326
fetch-depth : 0
233
327
234
328
- name : Set environment variables
235
- shell : bash --noprofile --norc -xeuo pipefail {0}
236
329
run : |
237
330
PYTHON_VERSION_FORMATTED=$(echo '${{ matrix.python-version }}' | tr -d '.')
238
331
if [[ "${{ matrix.host-platform }}" == linux* ]]; then
@@ -251,32 +344,83 @@ jobs:
251
344
fi
252
345
253
346
# make outputs from the previous job as env vars
254
- echo "CUDA_CORE_ARTIFACT_NAME=cuda-core-python${PYTHON_VERSION_FORMATTED}-${{ matrix.host-platform }}-${{ github.sha }}" >> $GITHUB_ENV
347
+ CUDA_CORE_ARTIFACT_BASENAME="cuda-core-python${PYTHON_VERSION_FORMATTED}-${{ matrix.host-platform }}"
348
+ echo "CUDA_CORE_ARTIFACT_BASENAME=${CUDA_CORE_ARTIFACT_BASENAME}" >> $GITHUB_ENV
349
+ echo "CUDA_CORE_ARTIFACT_NAME=${CUDA_CORE_ARTIFACT_BASENAME}-${{ github.sha }}" >> $GITHUB_ENV
255
350
echo "CUDA_CORE_ARTIFACTS_DIR=$(realpath "$REPO_DIR/cuda_core/dist")" >> $GITHUB_ENV
256
- echo "CUDA_BINDINGS_ARTIFACT_NAME=cuda-bindings-python${PYTHON_VERSION_FORMATTED}-cuda${{ needs.build.outputs.BUILD_CTK_VER }}-${{ matrix.host-platform }}-${{ github.sha }}" >> $GITHUB_ENV
351
+ CUDA_BINDINGS_ARTIFACT_BASENAME="cuda-bindings-python${PYTHON_VERSION_FORMATTED}-cuda${{ matrix.cuda-version }}-${{ matrix.host-platform }}"
352
+ echo "CUDA_BINDINGS_ARTIFACT_BASENAME=${CUDA_BINDINGS_ARTIFACT_BASENAME}" >> $GITHUB_ENV
353
+ echo "CUDA_BINDINGS_ARTIFACT_NAME=${CUDA_BINDINGS_ARTIFACT_BASENAME}-${{ github.sha }}" >> $GITHUB_ENV
257
354
echo "CUDA_BINDINGS_ARTIFACTS_DIR=$(realpath "$REPO_DIR/cuda_bindings/dist")" >> $GITHUB_ENV
258
355
echo "SKIP_CUDA_BINDINGS_TEST=${SKIP_CUDA_BINDINGS_TEST}" >> $GITHUB_ENV
259
356
357
+ # We'll try GHA Artifacts first, and then fall back to GHA Cache
260
358
- name : Download cuda.bindings build artifacts
359
+ id : cuda-bindings-download
261
360
uses : actions/download-artifact@v4
262
361
with :
263
362
name : ${{ env.CUDA_BINDINGS_ARTIFACT_NAME }}
264
363
path : ${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}
265
364
365
+ - name : Restore cuda.bindings cache
366
+ if : ${{ failure() && steps.cuda-bindings-download.conclusion == 'failure' }}
367
+ id : cuda-bindings-cache
368
+ uses : actions/cache/restore@v4
369
+ with :
370
+ key : ${{ env.CUDA_BINDINGS_ARTIFACT_NAME }}
371
+ path : ${{ env.CUDA_BINDINGS_ARTIFACT_BASENAME }}.tar.gz
372
+ restore-keys : ${{ env.CUDA_BINDINGS_ARTIFACT_BASENAME }}
373
+ fail-on-cache-miss : true
374
+
375
+ - name : Report cache restore status (hit)
376
+ if : ${{ steps.cuda-bindings-cache.conclusion != 'skipped' &&
377
+ steps.cuda-bindings-cache.outputs.cache-hit == 'true' }}
378
+ run : |
379
+ echo "cache is found"
380
+ CACHE_DIR="${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}"
381
+ CACHE_ARCHIVE="${{ env.CUDA_BINDINGS_ARTIFACT_BASENAME }}.tar.gz"
382
+ ls -l $CACHE_ARCHIVE
383
+ mkdir -p $CACHE_DIR
384
+ du -h $CACHE_ARCHIVE &&
385
+ tar -x -f $CACHE_ARCHIVE -C $CACHE_DIR &&
386
+ rm -f $CACHE_ARCHIVE || echo "WARNING: cache could not be retrieved."
387
+
266
388
- name : Display structure of downloaded cuda.bindings artifacts
267
- shell : bash --noprofile --norc -xeuo pipefail {0}
268
389
run : |
269
390
pwd
270
391
ls -lahR $CUDA_BINDINGS_ARTIFACTS_DIR
271
392
272
393
- name : Download cuda.core build artifacts
394
+ id : cuda-core-download
273
395
uses : actions/download-artifact@v4
274
396
with :
275
397
name : ${{ env.CUDA_CORE_ARTIFACT_NAME }}
276
398
path : ${{ env.CUDA_CORE_ARTIFACTS_DIR }}
277
399
400
+ - name : Restore cuda.core cache
401
+ if : ${{ failure() && steps.cuda-core-download.conclusion == 'failure' }}
402
+ id : cuda-core-cache
403
+ uses : actions/cache/restore@v4
404
+ with :
405
+ key : ${{ env.CUDA_CORE_ARTIFACT_NAME }}
406
+ path : ${{ env.CUDA_CORE_ARTIFACT_BASENAME }}.tar.gz
407
+ restore-keys : ${{ env.CUDA_CORE_ARTIFACT_BASENAME }}
408
+ fail-on-cache-miss : true
409
+
410
+ - name : Report cache restore status (hit)
411
+ if : ${{ steps.cuda-core-cache.conclusion != 'skipped' &&
412
+ steps.cuda-core-cache.outputs.cache-hit == 'true' }}
413
+ run : |
414
+ echo "cache is found"
415
+ CACHE_DIR="${{ env.CUDA_CORE_ARTIFACTS_DIR }}"
416
+ CACHE_ARCHIVE="${{ env.CUDA_CORE_ARTIFACT_BASENAME }}.tar.gz"
417
+ ls -l $CACHE_ARCHIVE
418
+ mkdir -p $CACHE_DIR
419
+ du -h $CACHE_ARCHIVE &&
420
+ tar -x -f $CACHE_ARCHIVE -C $CACHE_DIR &&
421
+ rm -f $CACHE_ARCHIVE || echo "WARNING: cache could not be retrieved."
422
+
278
423
- name : Display structure of downloaded cuda.core build artifacts
279
- shell : bash --noprofile --norc -xeuo pipefail {0}
280
424
run : |
281
425
pwd
282
426
ls -lahR $CUDA_CORE_ARTIFACTS_DIR
@@ -298,7 +442,6 @@ jobs:
298
442
299
443
- name : Run cuda.bindings tests
300
444
if : ${{ env.SKIP_CUDA_BINDINGS_TEST == '0' }}
301
- shell : bash --noprofile --norc -xeuo pipefail {0}
302
445
run : |
303
446
ls $CUDA_PATH
304
447
@@ -321,7 +464,6 @@ jobs:
321
464
popd
322
465
323
466
- name : Run cuda.core tests
324
- shell : bash --noprofile --norc -xeuo pipefail {0}
325
467
run : |
326
468
if [[ ${{ matrix.python-version }} == "3.13" ]]; then
327
469
# TODO: remove this hack once cuda-python has a cp313 build
@@ -346,7 +488,7 @@ jobs:
346
488
doc :
347
489
name : Docs
348
490
# The build stage could fail but we want the CI to keep moving.
349
- if : ${{ github.repository_owner == 'nvidia' && always () }}
491
+ if : ${{ github.repository_owner == 'nvidia' && !cancelled () }}
350
492
# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
351
493
permissions :
352
494
id-token : write
0 commit comments