Skip to content

Commit 5c375c3

Browse files
committed
AMDGPU: Fix worklist management in simplifyDemandedVectorEltsIntrinsic
Fixes bot sanitizer error, but it does leave behind a dead instruction if there is a bundle for some reason.
1 parent 36cd601 commit 5c375c3

File tree

2 files changed

+5
-5
lines changed

2 files changed

+5
-5
lines changed

llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1599,11 +1599,10 @@ Value *GCNTTIImpl::simplifyAMDGCNLaneIntrinsicDemanded(
15991599
// TODO: Preserve callsite attributes?
16001600
CallInst *NewCall = IC.Builder.CreateCall(Remangled, {Extract}, OpBundles);
16011601

1602-
Value *Result = IC.Builder.CreateInsertElement(PoisonValue::get(II.getType()),
1603-
NewCall, FirstElt);
1604-
IC.replaceInstUsesWith(II, Result);
1605-
IC.eraseInstFromFunction(II);
1606-
return Result;
1602+
// FIXME: If the call has a convergence bundle, we end up leaving the dead
1603+
// call behind.
1604+
return IC.Builder.CreateInsertElement(PoisonValue::get(II.getType()), NewCall,
1605+
FirstElt);
16071606
}
16081607

16091608
std::optional<Value *> GCNTTIImpl::simplifyDemandedVectorEltsIntrinsic(

llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -349,6 +349,7 @@ define i32 @extract_elt0_v2i32_readfirstlane_convergencetoken(<2 x i32> %src) co
349349
; CHECK-LABEL: define i32 @extract_elt0_v2i32_readfirstlane_convergencetoken(
350350
; CHECK-SAME: <2 x i32> [[SRC:%.*]]) #[[ATTR1:[0-9]+]] {
351351
; CHECK-NEXT: [[T:%.*]] = call token @llvm.experimental.convergence.entry()
352+
; CHECK-NEXT: [[VEC:%.*]] = call <2 x i32> @llvm.amdgcn.readfirstlane.v2i32(<2 x i32> [[SRC]]) [ "convergencectrl"(token [[T]]) ]
352353
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[SRC]], i64 0
353354
; CHECK-NEXT: [[ELT:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[TMP1]]) [ "convergencectrl"(token [[T]]) ]
354355
; CHECK-NEXT: ret i32 [[ELT]]

0 commit comments

Comments
 (0)