42
42
# (matrix.host-platform == 'win-64' && 'windows-amd64-cpu8') }}
43
43
outputs :
44
44
BUILD_CTK_VER : ${{ steps.pass_env.outputs.CUDA_VERSION }}
45
+ defaults :
46
+ run :
47
+ shell : bash --noprofile --norc -xeuo pipefail {0}
45
48
steps :
46
49
- name : Checkout ${{ github.event.repository.name }}
47
50
uses : actions/checkout@v4
62
65
uses : ilammy/msvc-dev-cmd@v1
63
66
64
67
- name : Set environment variables
65
- shell : bash --noprofile --norc -xeuo pipefail {0}
66
68
run : |
67
69
PYTHON_VERSION_FORMATTED=$(echo '${{ matrix.python-version }}' | tr -d '.')
68
70
if [[ "${{ matrix.host-platform }}" == linux* ]]; then
@@ -75,14 +77,25 @@ jobs:
75
77
fi
76
78
77
79
echo "PARALLEL_LEVEL=$(nproc)" >> $GITHUB_ENV
78
- echo "CUDA_CORE_ARTIFACT_NAME=cuda-core-python${PYTHON_VERSION_FORMATTED}-${{ matrix.host-platform }}-${{ github.sha }}" >> $GITHUB_ENV
80
+ CUDA_CORE_ARTIFACT_BASENAME="cuda-core-python${PYTHON_VERSION_FORMATTED}-${{ matrix.host-platform }}"
81
+ echo "CUDA_CORE_ARTIFACT_BASENAME=${CUDA_CORE_ARTIFACT_BASENAME}" >> $GITHUB_ENV
82
+ echo "CUDA_CORE_ARTIFACT_NAME=${CUDA_CORE_ARTIFACT_BASENAME}-${{ github.sha }}" >> $GITHUB_ENV
79
83
echo "CUDA_CORE_ARTIFACTS_DIR=$(realpath "$REPO_DIR/cuda_core/dist")" >> $GITHUB_ENV
80
- echo "CUDA_BINDINGS_ARTIFACT_NAME=cuda-bindings-python${PYTHON_VERSION_FORMATTED}-cuda${{ matrix.cuda-version }}-${{ matrix.host-platform }}-${{ github.sha }}" >> $GITHUB_ENV
84
+ CUDA_BINDINGS_ARTIFACT_BASENAME="cuda-bindings-python${PYTHON_VERSION_FORMATTED}-cuda${{ matrix.cuda-version }}-${{ matrix.host-platform }}"
85
+ echo "CUDA_BINDINGS_ARTIFACT_BASENAME=${CUDA_BINDINGS_ARTIFACT_BASENAME}" >> $GITHUB_ENV
86
+ echo "CUDA_BINDINGS_ARTIFACT_NAME=${CUDA_BINDINGS_ARTIFACT_BASENAME}-${{ github.sha }}" >> $GITHUB_ENV
81
87
echo "CUDA_BINDINGS_ARTIFACTS_DIR=$(realpath "$REPO_DIR/cuda_bindings/dist")" >> $GITHUB_ENV
82
88
echo "CIBW_BUILD=${CIBW_BUILD}" >> $GITHUB_ENV
83
-
89
+
90
+ # When the CI is run due to merging to main, we want it to populate GHA Cache not Artifacts,
91
+ # so that CI workflows running on every branch have a fallback to use.
92
+ if [[ "${{ github.ref_name}}" == main ]]; then
93
+ echo "USE_CACHE=1" >> $GITHUB_ENV
94
+ else
95
+ echo "USE_CACHE=0" >> $GITHUB_ENV
96
+ fi
97
+
84
98
- name : Dump environment
85
- shell : bash --noprofile --norc -xeuo pipefail {0}
86
99
run : |
87
100
env
88
101
97
110
output-dir : ${{ env.CUDA_CORE_ARTIFACTS_DIR }}
98
111
99
112
- name : List the cuda.core artifacts directory
100
- shell : bash --noprofile --norc -xeuo pipefail {0}
101
113
run : |
102
114
if [[ "${{ matrix.host-platform }}" == win* ]]; then
103
115
export CHOWN=chown
@@ -108,19 +120,40 @@ jobs:
108
120
ls -lahR ${{ env.CUDA_CORE_ARTIFACTS_DIR }}
109
121
110
122
- name : Check cuda.core wheel
111
- shell : bash --noprofile --norc -xeuo pipefail {0}
112
123
run : |
113
124
pip install twine
114
125
twine check ${{ env.CUDA_CORE_ARTIFACTS_DIR }}/*.whl
115
126
116
127
- name : Upload cuda.core build artifacts
128
+ if : ${{ env.USE_CACHE == '0' }}
117
129
uses : actions/upload-artifact@v4
118
130
with :
119
131
name : ${{ env.CUDA_CORE_ARTIFACT_NAME }}
120
132
path : ${{ env.CUDA_CORE_ARTIFACTS_DIR }}/*.whl
121
133
if-no-files-found : error
122
134
overwrite : ' true'
123
135
136
+ - name : Prepare cuda.core cache
137
+ if : ${{ env.USE_CACHE == '1' }}
138
+ run : |
139
+ if [[ "${{ env.USE_CACHE }}" == 1 ]]; then
140
+ # this file is uploaded to GHA Cache
141
+ tar -c -f "${{ env.CUDA_CORE_ARTIFACT_BASENAME }}.tar.gz" -C "${{ env.CUDA_CORE_ARTIFACTS_DIR }}" .
142
+ du -h "${{ env.CUDA_CORE_ARTIFACT_BASENAME }}.tar.gz"
143
+ # check if the previous runs from the same PR have populated the cache, if so need to clean it up
144
+ CACHE_KEY=${{ env.CUDA_CORE_ARTIFACT_NAME }}
145
+ if [ $(gh cache list | grep $CACHE_KEY | wc -l) == "1" ]; then
146
+ gh cache delete $CACHE_KEY
147
+ fi
148
+ fi
149
+
150
+ - name : Cache cuda.core build artifacts
151
+ if : ${{ env.USE_CACHE == '1' }}
152
+ uses : actions/cache/save@v4
153
+ with :
154
+ key : ${{ env.CUDA_CORE_ARTIFACT_NAME }}
155
+ path : ${{ env.CUDA_CORE_ARTIFACT_BASENAME }}.tar.gz
156
+
124
157
- name : Set up mini CTK
125
158
uses : ./.github/actions/fetch_ctk
126
159
continue-on-error : false
@@ -146,7 +179,6 @@ jobs:
146
179
output-dir : ${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}
147
180
148
181
- name : List the cuda.bindings artifacts directory
149
- shell : bash --noprofile --norc -xeuo pipefail {0}
150
182
run : |
151
183
if [[ "${{ matrix.host-platform }}" == win* ]]; then
152
184
export CHOWN=chown
@@ -158,18 +190,39 @@ jobs:
158
190
159
191
# TODO: enable this after NVIDIA/cuda-python#297 is resolved
160
192
# - name: Check cuda.bindings wheel
161
- # shell: bash --noprofile --norc -xeuo pipefail {0}
162
193
# run: |
163
194
# twine check ${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}/*.whl
164
195
196
+ - name : Prepare cuda.bindings cache
197
+ if : ${{ env.USE_CACHE == '1' }}
198
+ run : |
199
+ if [[ "${{ env.USE_CACHE }}" == 1 ]]; then
200
+ # this file is uploaded to GHA Cache
201
+ tar -c -f "${{ env.CUDA_BINDINGS_ARTIFACT_BASENAME }}.tar.gz" -C "${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}" .
202
+ du -h "${{ env.CUDA_BINDINGS_ARTIFACT_BASENAME }}.tar.gz"
203
+ # check if the previous runs from the same PR have populated the cache, if so need to clean it up
204
+ CACHE_KEY=${{ env.CUDA_BINDINGS_ARTIFACT_NAME }}
205
+ if [ $(gh cache list | grep $CACHE_KEY | wc -l) == "1" ]; then
206
+ gh cache delete $CACHE_KEY
207
+ fi
208
+ fi
209
+
165
210
- name : Upload cuda.bindings build artifacts
211
+ if : ${{ env.USE_CACHE == '0' }}
166
212
uses : actions/upload-artifact@v4
167
213
with :
168
214
name : ${{ env.CUDA_BINDINGS_ARTIFACT_NAME }}
169
215
path : ${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}/*.whl
170
216
if-no-files-found : error
171
217
overwrite : ' true'
172
218
219
+ - name : Cache cuda.bindings build artifacts
220
+ if : ${{ env.USE_CACHE == '1' }}
221
+ uses : actions/cache/save@v4
222
+ with :
223
+ key : ${{ env.CUDA_BINDINGS_ARTIFACT_NAME }}
224
+ path : ${{ env.CUDA_BINDINGS_ARTIFACT_BASENAME }}.tar.gz
225
+
173
226
- name : Pass environment variables to the next runner
174
227
id : pass_env
175
228
run : |
@@ -205,7 +258,7 @@ jobs:
205
258
runner : H100
206
259
name : Test (${{ matrix.host-platform }}, Python ${{ matrix.python-version }}, CUDA ${{ matrix.cuda-version }}, Runner ${{ matrix.runner }})
207
260
# The build stage could fail but we want the CI to keep moving.
208
- if : ${{ github.repository_owner == 'nvidia' && always () }}
261
+ if : ${{ github.repository_owner == 'nvidia' && !cancelled () }}
209
262
permissions :
210
263
id-token : write # This is required for configure-aws-credentials
211
264
contents : read # This is required for actions/checkout
@@ -221,9 +274,11 @@ jobs:
221
274
NVIDIA_VISIBLE_DEVICES : ${{ env.NVIDIA_VISIBLE_DEVICES }}
222
275
needs :
223
276
- build
277
+ defaults :
278
+ run :
279
+ shell : bash --noprofile --norc -xeuo pipefail {0}
224
280
steps :
225
281
- name : Ensure GPU is working
226
- shell : bash --noprofile --norc -xeuo pipefail {0}
227
282
run : nvidia-smi
228
283
229
284
- name : Checkout ${{ github.event.repository.name }}
@@ -232,7 +287,6 @@ jobs:
232
287
fetch-depth : 0
233
288
234
289
- name : Set environment variables
235
- shell : bash --noprofile --norc -xeuo pipefail {0}
236
290
run : |
237
291
PYTHON_VERSION_FORMATTED=$(echo '${{ matrix.python-version }}' | tr -d '.')
238
292
if [[ "${{ matrix.host-platform }}" == linux* ]]; then
@@ -251,32 +305,83 @@ jobs:
251
305
fi
252
306
253
307
# make outputs from the previous job as env vars
254
- echo "CUDA_CORE_ARTIFACT_NAME=cuda-core-python${PYTHON_VERSION_FORMATTED}-${{ matrix.host-platform }}-${{ github.sha }}" >> $GITHUB_ENV
308
+ CUDA_CORE_ARTIFACT_BASENAME="cuda-core-python${PYTHON_VERSION_FORMATTED}-${{ matrix.host-platform }}"
309
+ echo "CUDA_CORE_ARTIFACT_BASENAME=${CUDA_CORE_ARTIFACT_BASENAME}" >> $GITHUB_ENV
310
+ echo "CUDA_CORE_ARTIFACT_NAME=${CUDA_CORE_ARTIFACT_BASENAME}-${{ github.sha }}" >> $GITHUB_ENV
255
311
echo "CUDA_CORE_ARTIFACTS_DIR=$(realpath "$REPO_DIR/cuda_core/dist")" >> $GITHUB_ENV
256
- echo "CUDA_BINDINGS_ARTIFACT_NAME=cuda-bindings-python${PYTHON_VERSION_FORMATTED}-cuda${{ needs.build.outputs.BUILD_CTK_VER }}-${{ matrix.host-platform }}-${{ github.sha }}" >> $GITHUB_ENV
312
+ CUDA_BINDINGS_ARTIFACT_BASENAME="cuda-bindings-python${PYTHON_VERSION_FORMATTED}-cuda${{ matrix.cuda-version }}-${{ matrix.host-platform }}"
313
+ echo "CUDA_BINDINGS_ARTIFACT_BASENAME=${CUDA_BINDINGS_ARTIFACT_BASENAME}" >> $GITHUB_ENV
314
+ echo "CUDA_BINDINGS_ARTIFACT_NAME=${CUDA_BINDINGS_ARTIFACT_BASENAME}-${{ github.sha }}" >> $GITHUB_ENV
257
315
echo "CUDA_BINDINGS_ARTIFACTS_DIR=$(realpath "$REPO_DIR/cuda_bindings/dist")" >> $GITHUB_ENV
258
316
echo "SKIP_CUDA_BINDINGS_TEST=${SKIP_CUDA_BINDINGS_TEST}" >> $GITHUB_ENV
259
317
318
+ # We'll try GHA Artifacts first, and then fall back to GHA Cache
260
319
- name : Download cuda.bindings build artifacts
320
+ id : cuda-bindings-download
261
321
uses : actions/download-artifact@v4
262
322
with :
263
323
name : ${{ env.CUDA_BINDINGS_ARTIFACT_NAME }}
264
324
path : ${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}
265
325
326
+ - name : Restore cuda.bindings cache
327
+ if : ${{ failure() && steps.cuda-bindings-download.conclusion == 'failure' }}
328
+ id : cuda-bindings-cache
329
+ uses : actions/cache/restore@v4
330
+ with :
331
+ key : ${{ env.CUDA_BINDINGS_ARTIFACT_NAME }}
332
+ path : ${{ env.CUDA_BINDINGS_ARTIFACT_BASENAME }}.tar.gz
333
+ restore-keys : ${{ env.CUDA_BINDINGS_ARTIFACT_BASENAME }}
334
+ fail-on-cache-miss : true
335
+
336
+ - name : Report cache restore status (hit)
337
+ if : ${{ steps.cuda-bindings-cache.conclusion != 'skipped' &&
338
+ steps.cuda-bindings-cache.outputs.cache-hit == 'true' }}
339
+ run : |
340
+ echo "cache is found"
341
+ CACHE_DIR="${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}"
342
+ CACHE_ARCHIVE="${{ env.CUDA_BINDINGS_ARTIFACT_BASENAME }}.tar.gz"
343
+ ls -l $CACHE_ARCHIVE
344
+ mkdir -p $CACHE_DIR
345
+ du -h $CACHE_ARCHIVE &&
346
+ tar -x -f $CACHE_ARCHIVE -C $CACHE_DIR &&
347
+ rm -f $CACHE_ARCHIVE || echo "WARNING: cache could not be retrieved."
348
+
266
349
- name : Display structure of downloaded cuda.bindings artifacts
267
- shell : bash --noprofile --norc -xeuo pipefail {0}
268
350
run : |
269
351
pwd
270
352
ls -lahR $CUDA_BINDINGS_ARTIFACTS_DIR
271
353
272
354
- name : Download cuda.core build artifacts
355
+ id : cuda-core-download
273
356
uses : actions/download-artifact@v4
274
357
with :
275
358
name : ${{ env.CUDA_CORE_ARTIFACT_NAME }}
276
359
path : ${{ env.CUDA_CORE_ARTIFACTS_DIR }}
277
360
361
+ - name : Restore cuda.core cache
362
+ if : ${{ failure() && steps.cuda-core-download.conclusion == 'failure' }}
363
+ id : cuda-core-cache
364
+ uses : actions/cache/restore@v4
365
+ with :
366
+ key : ${{ env.CUDA_CORE_ARTIFACT_NAME }}
367
+ path : ${{ env.CUDA_CORE_ARTIFACT_BASENAME }}.tar.gz
368
+ restore-keys : ${{ env.CUDA_CORE_ARTIFACT_BASENAME }}
369
+ fail-on-cache-miss : true
370
+
371
+ - name : Report cache restore status (hit)
372
+ if : ${{ steps.cuda-core-cache.conclusion != 'skipped' &&
373
+ steps.cuda-core-cache.outputs.cache-hit == 'true' }}
374
+ run : |
375
+ echo "cache is found"
376
+ CACHE_DIR="${{ env.CUDA_CORE_ARTIFACTS_DIR }}"
377
+ CACHE_ARCHIVE="${{ env.CUDA_CORE_ARTIFACT_BASENAME }}.tar.gz"
378
+ ls -l $CACHE_ARCHIVE
379
+ mkdir -p $CACHE_DIR
380
+ du -h $CACHE_ARCHIVE &&
381
+ tar -x -f $CACHE_ARCHIVE -C $CACHE_DIR &&
382
+ rm -f $CACHE_ARCHIVE || echo "WARNING: cache could not be retrieved."
383
+
278
384
- name : Display structure of downloaded cuda.core build artifacts
279
- shell : bash --noprofile --norc -xeuo pipefail {0}
280
385
run : |
281
386
pwd
282
387
ls -lahR $CUDA_CORE_ARTIFACTS_DIR
@@ -295,7 +400,6 @@ jobs:
295
400
296
401
- name : Run cuda.bindings tests
297
402
if : ${{ env.SKIP_CUDA_BINDINGS_TEST == '0' }}
298
- shell : bash --noprofile --norc -xeuo pipefail {0}
299
403
run : |
300
404
ls $CUDA_PATH
301
405
@@ -311,7 +415,6 @@ jobs:
311
415
popd
312
416
313
417
- name : Run cuda.core tests
314
- shell : bash --noprofile --norc -xeuo pipefail {0}
315
418
run : |
316
419
if [[ ${{ matrix.python-version }} == "3.13" ]]; then
317
420
# TODO: remove this hack once cuda-python has a cp313 build
@@ -336,7 +439,7 @@ jobs:
336
439
doc :
337
440
name : Docs
338
441
# The build stage could fail but we want the CI to keep moving.
339
- if : ${{ github.repository_owner == 'nvidia' && always () }}
442
+ if : ${{ github.repository_owner == 'nvidia' && !cancelled () }}
340
443
# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
341
444
permissions :
342
445
id-token : write
0 commit comments