-
Notifications
You must be signed in to change notification settings - Fork 14.3k
AMDGPU: Do not create phi user for atomicrmw with no uses #103061
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Currently the custom expansion introducing address space tests always introduces a phi for the return value, even if it is unused. Avoid introducing the phi use if we don't need the return value. This avoids unnecessarily expanding the global sequence on targets that only support the no return version in a future patch that enables incremental legalization of atomics.
This stack of pull requests is managed by Graphite. Learn more about stacking. |
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) ChangesCurrently the custom expansion introducing address space tests Avoid introducing the phi use if we don't need the return value. Full diff: https://github.com/llvm/llvm-project/pull/103061.diff 3 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 1cf9fb7a3724b7..261e5fcb2d467f 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -16647,6 +16647,9 @@ void SITargetLowering::emitExpandAtomicRMW(AtomicRMWInst *AI) const {
IRBuilder<> Builder(AI);
LLVMContext &Ctx = Builder.getContext();
+ // If the return value isn't used, do not introduce a false use in the phi.
+ bool ReturnValueIsUsed = !AI->use_empty();
+
BasicBlock *BB = Builder.GetInsertBlock();
Function *F = BB->getParent();
BasicBlock *ExitBB =
@@ -16710,14 +16713,18 @@ void SITargetLowering::emitExpandAtomicRMW(AtomicRMWInst *AI) const {
Builder.CreateBr(PhiBB);
Builder.SetInsertPoint(PhiBB);
- PHINode *Loaded = Builder.CreatePHI(ValTy, 3);
- Loaded->addIncoming(LoadedShared, SharedBB);
- Loaded->addIncoming(LoadedPrivate, PrivateBB);
- Loaded->addIncoming(LoadedGlobal, GlobalBB);
+
+ if (ReturnValueIsUsed) {
+ PHINode *Loaded = Builder.CreatePHI(ValTy, 3);
+ Loaded->addIncoming(LoadedShared, SharedBB);
+ Loaded->addIncoming(LoadedPrivate, PrivateBB);
+ Loaded->addIncoming(LoadedGlobal, GlobalBB);
+ Loaded->takeName(AI);
+ AI->replaceAllUsesWith(Loaded);
+ }
+
Builder.CreateBr(ExitBB);
- Loaded->takeName(AI);
- AI->replaceAllUsesWith(Loaded);
AI->eraseFromParent();
}
diff --git a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-mmra.ll b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-mmra.ll
index 78969839efcb8a..8fa41d0bc61eb8 100644
--- a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-mmra.ll
+++ b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-mmra.ll
@@ -147,7 +147,6 @@ define void @syncscope_workgroup_nortn(ptr %addr, float %val) #0 {
; GFX90A-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[TMP4]], float [[VAL]] syncscope("workgroup") seq_cst, align 4, !mmra [[META0]]
; GFX90A-NEXT: br label [[ATOMICRMW_PHI]]
; GFX90A: atomicrmw.phi:
-; GFX90A-NEXT: [[LOADED_PHI:%.*]] = phi float [ [[TMP2]], [[ATOMICRMW_SHARED]] ], [ [[LOADED_PRIVATE]], [[ATOMICRMW_PRIVATE]] ], [ [[TMP5]], [[ATOMICRMW_GLOBAL]] ]
; GFX90A-NEXT: br label [[ATOMICRMW_END:%.*]]
; GFX90A: atomicrmw.end:
; GFX90A-NEXT: ret void
diff --git a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd-flat-specialization.ll b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd-flat-specialization.ll
index fc586a01e3bcf8..a8b54ac33d9042 100644
--- a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd-flat-specialization.ll
+++ b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd-flat-specialization.ll
@@ -166,7 +166,6 @@ define void @syncscope_workgroup_nortn(ptr %addr, float %val) {
; GFX908-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[TMP4]], float [[VAL]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
; GFX908-NEXT: br label [[ATOMICRMW_PHI]]
; GFX908: atomicrmw.phi:
-; GFX908-NEXT: [[RES:%.*]] = phi float [ [[TMP2]], [[ATOMICRMW_SHARED]] ], [ [[LOADED_PRIVATE]], [[ATOMICRMW_PRIVATE]] ], [ [[TMP5]], [[ATOMICRMW_GLOBAL]] ]
; GFX908-NEXT: br label [[ATOMICRMW_END:%.*]]
; GFX908: atomicrmw.end:
; GFX908-NEXT: ret void
@@ -192,7 +191,6 @@ define void @syncscope_workgroup_nortn(ptr %addr, float %val) {
; GFX90A-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[TMP4]], float [[VAL]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
; GFX90A-NEXT: br label [[ATOMICRMW_PHI]]
; GFX90A: atomicrmw.phi:
-; GFX90A-NEXT: [[RES:%.*]] = phi float [ [[TMP2]], [[ATOMICRMW_SHARED]] ], [ [[LOADED_PRIVATE]], [[ATOMICRMW_PRIVATE]] ], [ [[TMP5]], [[ATOMICRMW_GLOBAL]] ]
; GFX90A-NEXT: br label [[ATOMICRMW_END:%.*]]
; GFX90A: atomicrmw.end:
; GFX90A-NEXT: ret void
|
Currently the custom expansion introducing address space tests
always introduces a phi for the return value, even if it is unused.
Avoid introducing the phi use if we don't need the return value.
This avoids unnecessarily expanding the global sequence on
targets that only support the no return version in a future
patch that enables incremental legalization of atomics.