Skip to content

Commit 60621b8

Browse files
authored
Fix top LLVM: renamed NVPTX barrier intrinsics. (#8631)
LLVM PTX barrier intrinsics changed name: : llvm/llvm-project#141143. Now trying both, and using llvm::Function::getIntrinsicID to figure out which one is actually the working one.
1 parent 85a3b07 commit 60621b8

File tree

2 files changed

+18
-4
lines changed

2 files changed

+18
-4
lines changed

src/CodeGen_PTX_Dev.cpp

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -262,9 +262,16 @@ void CodeGen_PTX_Dev::visit(const Call *op) {
262262
auto fence_type_ptr = as_const_int(op->args[0]);
263263
internal_assert(fence_type_ptr) << "gpu_thread_barrier() parameter is not a constant integer.\n";
264264

265-
llvm::Function *barrier0 = module->getFunction("llvm.nvvm.barrier0");
266-
internal_assert(barrier0) << "Could not find PTX barrier intrinsic (llvm.nvvm.barrier0)\n";
267-
builder->CreateCall(barrier0);
265+
llvm::Function *barrier;
266+
if ((barrier = module->getFunction("llvm.nvvm.barrier.cta.sync.aligned.all")) && barrier->getIntrinsicID() != 0) {
267+
// LLVM 20.1.6 and above: https://github.com/llvm/llvm-project/pull/140615
268+
builder->CreateCall(barrier, builder->getInt32(0));
269+
} else if ((barrier = module->getFunction("llvm.nvvm.barrier0")) && barrier->getIntrinsicID() != 0) {
270+
// LLVM 21.1.5 and below: Testing for llvm.nvvm.barrier0 can be removed once we drop support for LLVM 20
271+
builder->CreateCall(barrier);
272+
} else {
273+
internal_error << "Could not find PTX barrier intrinsic llvm.nvvm.barrier0 nor llvm.nvvm.barrier.cta.sync.aligned.all\n";
274+
}
268275
value = ConstantInt::get(i32_t, 0);
269276
return;
270277
}

src/runtime/ptx_dev.ll

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,11 @@
1-
declare void @llvm.nvvm.barrier0()
1+
; The two forward declared intrinsics below refer to the same thing.
2+
; LLVM 20.1.6 introduced a new naming scheme for these intrinsics
3+
; We have to declare both, such that we can access them from the Module's
4+
; getFunction(), but one of those will map to an intrinsic, which we
5+
; will use to determine which intrinsic is supported by LLVM.
6+
declare void @llvm.nvvm.barrier0() ; LLVM <=20.1.5
7+
declare void @llvm.nvvm.barrier.cta.sync.aligned.all(i32) ; LLVM >=20.1.6
8+
29
declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
310
declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
411
declare i32 @llvm.nvvm.read.ptx.sreg.ntid.x()

0 commit comments

Comments
 (0)