Skip to content

Commit eb52809

Browse files
authored
[NVPTX] Fixup AutoUpgrade of llvm.nvvm.atomic.load.{inc,dec}.32 (#138907)
The previous implementation failed to account for the fact that these intrinsics have an overloaded pointer type. This version handles the pointer type and adds tests for llvm.nvvm.atomic.load.add.{f32,f64}.
1 parent 3ed158f commit eb52809

File tree

2 files changed

+24
-13
lines changed

2 files changed

+24
-13
lines changed

llvm/lib/IR/AutoUpgrade.cpp

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1353,12 +1353,15 @@ static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn,
13531353
// nvvm.{min,max}.{i,ii,ui,ull}
13541354
Expand = Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
13551355
Name == "ui" || Name == "ull";
1356-
else if (Name.consume_front("atomic.load.add."))
1357-
// nvvm.atomic.load.add.{f32.p,f64.p}
1358-
Expand = Name.starts_with("f32.p") || Name.starts_with("f64.p");
1359-
else if (Name.consume_front("atomic.load.") && Name.consume_back(".32"))
1360-
// nvvm.atomic.load.{inc,dec}.32
1361-
Expand = Name == "inc" || Name == "dec";
1356+
else if (Name.consume_front("atomic.load."))
1357+
// nvvm.atomic.load.add.{f32,f64}.p
1358+
// nvvm.atomic.load.{inc,dec}.32.p
1359+
Expand = StringSwitch<bool>(Name)
1360+
.StartsWith("add.f32.p", true)
1361+
.StartsWith("add.f64.p", true)
1362+
.StartsWith("inc.32.p", true)
1363+
.StartsWith("dec.32.p", true)
1364+
.Default(false);
13621365
else if (Name.consume_front("bitcast."))
13631366
// nvvm.bitcast.{f2i,i2f,ll2d,d2ll}
13641367
Expand =
@@ -2383,10 +2386,12 @@ static Value *upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI,
23832386
Value *Val = CI->getArgOperand(1);
23842387
Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val, MaybeAlign(),
23852388
AtomicOrdering::SequentiallyConsistent);
2386-
} else if (Name.consume_front("atomic.load.") && Name.consume_back(".32")) {
2389+
} else if (Name.starts_with("atomic.load.inc.32.p") ||
2390+
Name.starts_with("atomic.load.dec.32.p")) {
23872391
Value *Ptr = CI->getArgOperand(0);
23882392
Value *Val = CI->getArgOperand(1);
2389-
auto Op = Name == "inc" ? AtomicRMWInst::UIncWrap : AtomicRMWInst::UDecWrap;
2393+
auto Op = Name.starts_with("atomic.load.inc") ? AtomicRMWInst::UIncWrap
2394+
: AtomicRMWInst::UDecWrap;
23902395
Rep = Builder.CreateAtomicRMW(Op, Ptr, Val, MaybeAlign(),
23912396
AtomicOrdering::SequentiallyConsistent);
23922397
} else if (Name.consume_front("max.") &&

llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,10 @@ declare i32 @llvm.nvvm.ldg.global.i.i32.p0(ptr, i32)
5858
declare ptr @llvm.nvvm.ldg.global.p.p0(ptr, i32)
5959
declare float @llvm.nvvm.ldg.global.f.f32.p0(ptr, i32)
6060

61-
declare i32 @llvm.nvvm.atomic.load.inc.32(ptr, i32)
62-
declare i32 @llvm.nvvm.atomic.load.dec.32(ptr, i32)
61+
declare i32 @llvm.nvvm.atomic.load.inc.32.p0(ptr, i32)
62+
declare i32 @llvm.nvvm.atomic.load.dec.32.p0(ptr, i32)
63+
declare i32 @llvm.nvvm.atomic.load.add.f32.p0(ptr, float)
64+
declare i32 @llvm.nvvm.atomic.load.add.f64.p0(ptr, double)
6365

6466
declare ptr addrspace(3) @llvm.nvvm.mapa.shared.cluster(ptr addrspace(3), i32)
6567

@@ -267,12 +269,16 @@ define void @ldg(ptr %p0, ptr addrspace(1) %p1) {
267269
}
268270

269271
; CHECK-LABEL: @atomics
270-
define i32 @atomics(ptr %p0, i32 %a) {
272+
define i32 @atomics(ptr %p0, i32 %a, float %b, double %c) {
271273
; CHECK: %1 = atomicrmw uinc_wrap ptr %p0, i32 %a seq_cst
272274
; CHECK: %2 = atomicrmw udec_wrap ptr %p0, i32 %a seq_cst
275+
; CHECK: %3 = atomicrmw fadd ptr %p0, float %b seq_cst
276+
; CHECK: %4 = atomicrmw fadd ptr %p0, double %c seq_cst
273277

274-
%r1 = call i32 @llvm.nvvm.atomic.load.inc.32(ptr %p0, i32 %a)
275-
%r2 = call i32 @llvm.nvvm.atomic.load.dec.32(ptr %p0, i32 %a)
278+
%r1 = call i32 @llvm.nvvm.atomic.load.inc.32.p0(ptr %p0, i32 %a)
279+
%r2 = call i32 @llvm.nvvm.atomic.load.dec.32.p0(ptr %p0, i32 %a)
280+
%r3 = call float @llvm.nvvm.atomic.load.add.f32.p0(ptr %p0, float %b)
281+
%r4 = call double @llvm.nvvm.atomic.load.add.f64.p0(ptr %p0, double %c)
276282
ret i32 %r2
277283
}
278284

0 commit comments

Comments
 (0)