File tree Expand file tree Collapse file tree 2 files changed +18
-4
lines changed Expand file tree Collapse file tree 2 files changed +18
-4
lines changed Original file line number Diff line number Diff line change @@ -262,9 +262,16 @@ void CodeGen_PTX_Dev::visit(const Call *op) {
262
262
auto fence_type_ptr = as_const_int (op->args [0 ]);
263
263
internal_assert (fence_type_ptr) << " gpu_thread_barrier() parameter is not a constant integer.\n " ;
264
264
265
- llvm::Function *barrier0 = module ->getFunction (" llvm.nvvm.barrier0" );
266
- internal_assert (barrier0) << " Could not find PTX barrier intrinsic (llvm.nvvm.barrier0)\n " ;
267
- builder->CreateCall (barrier0);
265
+ llvm::Function *barrier;
266
+ if ((barrier = module ->getFunction (" llvm.nvvm.barrier.cta.sync.aligned.all" )) && barrier->getIntrinsicID () != 0 ) {
267
+ // LLVM 20.1.6 and above: https://github.com/llvm/llvm-project/pull/140615
268
+ builder->CreateCall (barrier, builder->getInt32 (0 ));
269
+ } else if ((barrier = module ->getFunction (" llvm.nvvm.barrier0" )) && barrier->getIntrinsicID () != 0 ) {
270
+ // LLVM 21.1.5 and below: Testing for llvm.nvvm.barrier0 can be removed once we drop support for LLVM 20
271
+ builder->CreateCall (barrier);
272
+ } else {
273
+ internal_error << " Could not find PTX barrier intrinsic llvm.nvvm.barrier0 nor llvm.nvvm.barrier.cta.sync.aligned.all\n " ;
274
+ }
268
275
value = ConstantInt::get (i32_t , 0 );
269
276
return ;
270
277
}
Original file line number Diff line number Diff line change 1
- declare void @llvm.nvvm.barrier0 ()
1
+ ; The two forward declared intrinsics below refer to the same thing.
2
+ ; LLVM 20.1.6 introduced a new naming scheme for these intrinsics
3
+ ; We have to declare both, such that we can access them from the Module's
4
+ ; getFunction(), but one of those will map to an intrinsic, which we
5
+ ; will use to determine which intrinsic is supported by LLVM.
6
+ declare void @llvm.nvvm.barrier0 () ; LLVM <=20.1.5
7
+ declare void @llvm.nvvm.barrier.cta.sync.aligned.all (i32 ) ; LLVM >=20.1.6
8
+
2
9
declare i32 @llvm.nvvm.read.ptx.sreg.tid.x ()
3
10
declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x ()
4
11
declare i32 @llvm.nvvm.read.ptx.sreg.ntid.x ()
You can’t perform that action at this time.
0 commit comments