42
42
# (matrix.host-platform == 'win-64' && 'windows-amd64-cpu8') }}
43
43
outputs :
44
44
BUILD_CTK_VER : ${{ steps.pass_env.outputs.CUDA_VERSION }}
45
+ defaults :
46
+ run :
47
+ shell : bash --noprofile --norc -xeuo pipefail {0}
45
48
steps :
46
49
- name : Checkout ${{ github.event.repository.name }}
47
50
uses : actions/checkout@v4
62
65
uses : ilammy/msvc-dev-cmd@v1
63
66
64
67
- name : Set environment variables
65
- shell : bash --noprofile --norc -xeuo pipefail {0}
66
68
run : |
67
69
PYTHON_VERSION_FORMATTED=$(echo '${{ matrix.python-version }}' | tr -d '.')
68
70
if [[ "${{ matrix.host-platform }}" == linux* ]]; then
@@ -75,14 +77,60 @@ jobs:
75
77
fi
76
78
77
79
echo "PARALLEL_LEVEL=$(nproc)" >> $GITHUB_ENV
78
- echo "CUDA_CORE_ARTIFACT_NAME=cuda-core-python${PYTHON_VERSION_FORMATTED}-${{ matrix.host-platform }}-${{ github.sha }}" >> $GITHUB_ENV
80
+ CUDA_CORE_ARTIFACT_BASENAME="cuda-core-python${PYTHON_VERSION_FORMATTED}-${{ matrix.host-platform }}"
81
+ echo "CUDA_CORE_ARTIFACT_BASENAME=${CUDA_CORE_ARTIFACT_BASENAME}" >> $GITHUB_ENV
82
+ echo "CUDA_CORE_ARTIFACT_NAME=${CUDA_CORE_ARTIFACT_BASENAME}-${{ github.sha }}" >> $GITHUB_ENV
79
83
echo "CUDA_CORE_ARTIFACTS_DIR=$(realpath "$REPO_DIR/cuda_core/dist")" >> $GITHUB_ENV
80
- echo "CUDA_BINDINGS_ARTIFACT_NAME=cuda-bindings-python${PYTHON_VERSION_FORMATTED}-cuda${{ matrix.cuda-version }}-${{ matrix.host-platform }}-${{ github.sha }}" >> $GITHUB_ENV
84
+ CUDA_BINDINGS_ARTIFACT_BASENAME="cuda-bindings-python${PYTHON_VERSION_FORMATTED}-cuda${{ matrix.cuda-version }}-${{ matrix.host-platform }}"
85
+ echo "CUDA_BINDINGS_ARTIFACT_BASENAME=${CUDA_BINDINGS_ARTIFACT_BASENAME}" >> $GITHUB_ENV
86
+ echo "CUDA_BINDINGS_ARTIFACT_NAME=${CUDA_BINDINGS_ARTIFACT_BASENAME}-${{ github.sha }}" >> $GITHUB_ENV
81
87
echo "CUDA_BINDINGS_ARTIFACTS_DIR=$(realpath "$REPO_DIR/cuda_bindings/dist")" >> $GITHUB_ENV
82
88
echo "CIBW_BUILD=${CIBW_BUILD}" >> $GITHUB_ENV
83
-
89
+
90
+ # When the CI is run due to merging to main, we want it to populate GHA Cache not Artifacts,
91
+ # so that CI workflows running on every branch have a fallback to use.
92
+ if [[ "${{ github.ref_name}}" == main ]]; then
93
+ echo "USE_CACHE=1" >> $GITHUB_ENV
94
+ else
95
+ echo "USE_CACHE=0" >> $GITHUB_ENV
96
+ fi
97
+
98
+ # TODO: revert me before merging; this is to test the cache restore in the PR
99
+ echo "USE_CACHE=1" >> $GITHUB_ENV
100
+
101
+ - name : Install dependencies
102
+ if : ${{ env.USE_CACHE == '1' }}
103
+ run : |
104
+ # For GHA Cache
105
+ dependencies=(zstd)
106
+ dependent_exes=(zstd)
107
+
108
+ not_found=0
109
+ for dep in ${dependent_exes[@]}; do
110
+ if ! (command -v curl 2>&1 >/dev/null); then
111
+ not_found=1
112
+ break
113
+ fi
114
+ done
115
+ if [[ $not_found == 0 ]]; then
116
+ echo "All dependencies are found. Do nothing."
117
+ exit 0
118
+ fi
119
+ if ! (command -v sudo 2>&1 >/dev/null); then
120
+ if [[ $EUID == 0 ]]; then
121
+ alias SUDO=""
122
+ else
123
+ echo "The following oprations require root access."
124
+ exit 1
125
+ fi
126
+ else
127
+ alias SUDO="sudo"
128
+ fi
129
+ shopt -s expand_aliases
130
+ SUDO apt update
131
+ SUDO apt install -y ${dependencies[@]}
132
+
84
133
- name : Dump environment
85
- shell : bash --noprofile --norc -xeuo pipefail {0}
86
134
run : |
87
135
env
88
136
97
145
output-dir : ${{ env.CUDA_CORE_ARTIFACTS_DIR }}
98
146
99
147
- name : List the cuda.core artifacts directory
100
- shell : bash --noprofile --norc -xeuo pipefail {0}
101
148
run : |
102
149
if [[ "${{ matrix.host-platform }}" == win* ]]; then
103
150
export CHOWN=chown
@@ -108,19 +155,40 @@ jobs:
108
155
ls -lahR ${{ env.CUDA_CORE_ARTIFACTS_DIR }}
109
156
110
157
- name : Check cuda.core wheel
111
- shell : bash --noprofile --norc -xeuo pipefail {0}
112
158
run : |
113
159
pip install twine
114
160
twine check ${{ env.CUDA_CORE_ARTIFACTS_DIR }}/*.whl
115
161
116
162
- name : Upload cuda.core build artifacts
163
+ if : ${{ env.USE_CACHE == '0' }}
117
164
uses : actions/upload-artifact@v4
118
165
with :
119
166
name : ${{ env.CUDA_CORE_ARTIFACT_NAME }}
120
167
path : ${{ env.CUDA_CORE_ARTIFACTS_DIR }}/*.whl
121
168
if-no-files-found : error
122
169
overwrite : ' true'
123
170
171
+ - name : Prepare cuda.core cache
172
+ if : ${{ env.USE_CACHE == '1' }}
173
+ run : |
174
+ if [[ "${{ env.USE_CACHE }}" == 1 ]]; then
175
+ # this file is uploaded to GHA Cache
176
+ tar -c -f "${{ env.CUDA_CORE_ARTIFACT_BASENAME }}.tar.gz" -C "${{ env.CUDA_CORE_ARTIFACTS_DIR }}" .
177
+ du -h "${{ env.CUDA_CORE_ARTIFACT_BASENAME }}.tar.gz"
178
+ # check if the previous runs from the same PR have populated the cache, if so need to clean it up
179
+ CACHE_KEY=${{ env.CUDA_CORE_ARTIFACT_NAME }}
180
+ if [ $(gh cache list | grep $CACHE_KEY | wc -l) == "1" ]; then
181
+ gh cache delete $CACHE_KEY
182
+ fi
183
+ fi
184
+
185
+ - name : Cache cuda.core build artifacts
186
+ if : ${{ env.USE_CACHE == '1' }}
187
+ uses : actions/cache/save@v4
188
+ with :
189
+ key : ${{ env.CUDA_CORE_ARTIFACT_NAME }}
190
+ path : ${{ env.CUDA_CORE_ARTIFACT_BASENAME }}.tar.gz
191
+
124
192
- name : Set up mini CTK
125
193
uses : ./.github/actions/fetch_ctk
126
194
continue-on-error : false
@@ -146,7 +214,6 @@ jobs:
146
214
output-dir : ${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}
147
215
148
216
- name : List the cuda.bindings artifacts directory
149
- shell : bash --noprofile --norc -xeuo pipefail {0}
150
217
run : |
151
218
if [[ "${{ matrix.host-platform }}" == win* ]]; then
152
219
export CHOWN=chown
@@ -158,18 +225,39 @@ jobs:
158
225
159
226
# TODO: enable this after NVIDIA/cuda-python#297 is resolved
160
227
# - name: Check cuda.bindings wheel
161
- # shell: bash --noprofile --norc -xeuo pipefail {0}
162
228
# run: |
163
229
# twine check ${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}/*.whl
164
230
231
+ - name : Prepare cuda.bindings cache
232
+ if : ${{ env.USE_CACHE == '1' }}
233
+ run : |
234
+ if [[ "${{ env.USE_CACHE }}" == 1 ]]; then
235
+ # this file is uploaded to GHA Cache
236
+ tar -c -f "${{ env.CUDA_BINDINGS_ARTIFACT_BASENAME }}.tar.gz" -C "${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}" .
237
+ du -h "${{ env.CUDA_BINDINGS_ARTIFACT_BASENAME }}.tar.gz"
238
+ # check if the previous runs from the same PR have populated the cache, if so need to clean it up
239
+ CACHE_KEY=${{ env.CUDA_BINDINGS_ARTIFACT_NAME }}
240
+ if [ $(gh cache list | grep $CACHE_KEY | wc -l) == "1" ]; then
241
+ gh cache delete $CACHE_KEY
242
+ fi
243
+ fi
244
+
165
245
- name : Upload cuda.bindings build artifacts
246
+ if : ${{ env.USE_CACHE == '0' }}
166
247
uses : actions/upload-artifact@v4
167
248
with :
168
249
name : ${{ env.CUDA_BINDINGS_ARTIFACT_NAME }}
169
250
path : ${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}/*.whl
170
251
if-no-files-found : error
171
252
overwrite : ' true'
172
253
254
+ - name : Cache cuda.bindings build artifacts
255
+ if : ${{ env.USE_CACHE == '1' }}
256
+ uses : actions/cache/save@v4
257
+ with :
258
+ key : ${{ env.CUDA_BINDINGS_ARTIFACT_NAME }}
259
+ path : ${{ env.CUDA_BINDINGS_ARTIFACT_BASENAME }}.tar.gz
260
+
173
261
- name : Pass environment variables to the next runner
174
262
id : pass_env
175
263
run : |
@@ -205,7 +293,7 @@ jobs:
205
293
runner : H100
206
294
name : Test (${{ matrix.host-platform }}, Python ${{ matrix.python-version }}, CUDA ${{ matrix.cuda-version }}, Runner ${{ matrix.runner }})
207
295
# The build stage could fail but we want the CI to keep moving.
208
- if : ${{ github.repository_owner == 'nvidia' && always () }}
296
+ if : ${{ github.repository_owner == 'nvidia' && !cancelled () }}
209
297
permissions :
210
298
id-token : write # This is required for configure-aws-credentials
211
299
contents : read # This is required for actions/checkout
@@ -221,9 +309,11 @@ jobs:
221
309
NVIDIA_VISIBLE_DEVICES : ${{ env.NVIDIA_VISIBLE_DEVICES }}
222
310
needs :
223
311
- build
312
+ defaults :
313
+ run :
314
+ shell : bash --noprofile --norc -xeuo pipefail {0}
224
315
steps :
225
316
- name : Ensure GPU is working
226
- shell : bash --noprofile --norc -xeuo pipefail {0}
227
317
run : nvidia-smi
228
318
229
319
- name : Checkout ${{ github.event.repository.name }}
@@ -232,7 +322,6 @@ jobs:
232
322
fetch-depth : 0
233
323
234
324
- name : Set environment variables
235
- shell : bash --noprofile --norc -xeuo pipefail {0}
236
325
run : |
237
326
PYTHON_VERSION_FORMATTED=$(echo '${{ matrix.python-version }}' | tr -d '.')
238
327
if [[ "${{ matrix.host-platform }}" == linux* ]]; then
@@ -251,32 +340,83 @@ jobs:
251
340
fi
252
341
253
342
# make outputs from the previous job as env vars
254
- echo "CUDA_CORE_ARTIFACT_NAME=cuda-core-python${PYTHON_VERSION_FORMATTED}-${{ matrix.host-platform }}-${{ github.sha }}" >> $GITHUB_ENV
343
+ CUDA_CORE_ARTIFACT_BASENAME="cuda-core-python${PYTHON_VERSION_FORMATTED}-${{ matrix.host-platform }}"
344
+ echo "CUDA_CORE_ARTIFACT_BASENAME=${CUDA_CORE_ARTIFACT_BASENAME}" >> $GITHUB_ENV
345
+ echo "CUDA_CORE_ARTIFACT_NAME=${CUDA_CORE_ARTIFACT_BASENAME}-${{ github.sha }}" >> $GITHUB_ENV
255
346
echo "CUDA_CORE_ARTIFACTS_DIR=$(realpath "$REPO_DIR/cuda_core/dist")" >> $GITHUB_ENV
256
- echo "CUDA_BINDINGS_ARTIFACT_NAME=cuda-bindings-python${PYTHON_VERSION_FORMATTED}-cuda${{ needs.build.outputs.BUILD_CTK_VER }}-${{ matrix.host-platform }}-${{ github.sha }}" >> $GITHUB_ENV
347
+ CUDA_BINDINGS_ARTIFACT_BASENAME="cuda-bindings-python${PYTHON_VERSION_FORMATTED}-cuda${{ matrix.cuda-version }}-${{ matrix.host-platform }}"
348
+ echo "CUDA_BINDINGS_ARTIFACT_BASENAME=${CUDA_BINDINGS_ARTIFACT_BASENAME}" >> $GITHUB_ENV
349
+ echo "CUDA_BINDINGS_ARTIFACT_NAME=${CUDA_BINDINGS_ARTIFACT_BASENAME}-${{ github.sha }}" >> $GITHUB_ENV
257
350
echo "CUDA_BINDINGS_ARTIFACTS_DIR=$(realpath "$REPO_DIR/cuda_bindings/dist")" >> $GITHUB_ENV
258
351
echo "SKIP_CUDA_BINDINGS_TEST=${SKIP_CUDA_BINDINGS_TEST}" >> $GITHUB_ENV
259
352
353
+ # We'll try GHA Artifacts first, and then fall back to GHA Cache
260
354
- name : Download cuda.bindings build artifacts
355
+ id : cuda-bindings-download
261
356
uses : actions/download-artifact@v4
262
357
with :
263
358
name : ${{ env.CUDA_BINDINGS_ARTIFACT_NAME }}
264
359
path : ${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}
265
360
361
+ - name : Restore cuda.bindings cache
362
+ if : ${{ failure() && steps.cuda-bindings-download.conclusion == 'failure' }}
363
+ id : cuda-bindings-cache
364
+ uses : actions/cache/restore@v4
365
+ with :
366
+ key : ${{ env.CUDA_BINDINGS_ARTIFACT_NAME }}
367
+ path : ${{ env.CUDA_BINDINGS_ARTIFACT_BASENAME }}.tar.gz
368
+ restore-keys : ${{ env.CUDA_BINDINGS_ARTIFACT_BASENAME }}
369
+ fail-on-cache-miss : true
370
+
371
+ - name : Report cache restore status (hit)
372
+ if : ${{ steps.cuda-bindings-cache.conclusion != 'skipped' &&
373
+ steps.cuda-bindings-cache.outputs.cache-hit == 'true' }}
374
+ run : |
375
+ echo "cache is found"
376
+ CACHE_DIR="${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}"
377
+ CACHE_ARCHIVE="${{ env.CUDA_BINDINGS_ARTIFACT_BASENAME }}.tar.gz"
378
+ ls -l $CACHE_ARCHIVE
379
+ mkdir -p $CACHE_DIR
380
+ du -h $CACHE_ARCHIVE &&
381
+ tar -x -f $CACHE_ARCHIVE -C $CACHE_DIR &&
382
+ rm -f $CACHE_ARCHIVE || echo "WARNING: cache could not be retrieved."
383
+
266
384
- name : Display structure of downloaded cuda.bindings artifacts
267
- shell : bash --noprofile --norc -xeuo pipefail {0}
268
385
run : |
269
386
pwd
270
387
ls -lahR $CUDA_BINDINGS_ARTIFACTS_DIR
271
388
272
389
- name : Download cuda.core build artifacts
390
+ id : cuda-core-download
273
391
uses : actions/download-artifact@v4
274
392
with :
275
393
name : ${{ env.CUDA_CORE_ARTIFACT_NAME }}
276
394
path : ${{ env.CUDA_CORE_ARTIFACTS_DIR }}
277
395
396
+ - name : Restore cuda.core cache
397
+ if : ${{ failure() && steps.cuda-core-download.conclusion == 'failure' }}
398
+ id : cuda-core-cache
399
+ uses : actions/cache/restore@v4
400
+ with :
401
+ key : ${{ env.CUDA_CORE_ARTIFACT_NAME }}
402
+ path : ${{ env.CUDA_CORE_ARTIFACT_BASENAME }}.tar.gz
403
+ restore-keys : ${{ env.CUDA_CORE_ARTIFACT_BASENAME }}
404
+ fail-on-cache-miss : true
405
+
406
+ - name : Report cache restore status (hit)
407
+ if : ${{ steps.cuda-core-cache.conclusion != 'skipped' &&
408
+ steps.cuda-core-cache.outputs.cache-hit == 'true' }}
409
+ run : |
410
+ echo "cache is found"
411
+ CACHE_DIR="${{ env.CUDA_CORE_ARTIFACTS_DIR }}"
412
+ CACHE_ARCHIVE="${{ env.CUDA_CORE_ARTIFACT_BASENAME }}.tar.gz"
413
+ ls -l $CACHE_ARCHIVE
414
+ mkdir -p $CACHE_DIR
415
+ du -h $CACHE_ARCHIVE &&
416
+ tar -x -f $CACHE_ARCHIVE -C $CACHE_DIR &&
417
+ rm -f $CACHE_ARCHIVE || echo "WARNING: cache could not be retrieved."
418
+
278
419
- name : Display structure of downloaded cuda.core build artifacts
279
- shell : bash --noprofile --norc -xeuo pipefail {0}
280
420
run : |
281
421
pwd
282
422
ls -lahR $CUDA_CORE_ARTIFACTS_DIR
@@ -295,7 +435,6 @@ jobs:
295
435
296
436
- name : Run cuda.bindings tests
297
437
if : ${{ env.SKIP_CUDA_BINDINGS_TEST == '0' }}
298
- shell : bash --noprofile --norc -xeuo pipefail {0}
299
438
run : |
300
439
ls $CUDA_PATH
301
440
@@ -311,7 +450,6 @@ jobs:
311
450
popd
312
451
313
452
- name : Run cuda.core tests
314
- shell : bash --noprofile --norc -xeuo pipefail {0}
315
453
run : |
316
454
if [[ ${{ matrix.python-version }} == "3.13" ]]; then
317
455
# TODO: remove this hack once cuda-python has a cp313 build
@@ -336,7 +474,7 @@ jobs:
336
474
doc :
337
475
name : Docs
338
476
# The build stage could fail but we want the CI to keep moving.
339
- if : ${{ github.repository_owner == 'nvidia' && always () }}
477
+ if : ${{ github.repository_owner == 'nvidia' && !cancelled () }}
340
478
# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
341
479
permissions :
342
480
id-token : write
0 commit comments