Skip to content

Commit eb1efd8

Browse files
jrbyrnesaaryanshukla
authored andcommitted
[AMDGPU] Do not use original PHIs in coercion chains (llvm#98063)
It's possible that we are unable to coerce all the incoming values of a PHINode (A). Thus, we are unable to coerce the PHINode. In this situation, we previously would add the PHINode back to the ValMap. This would cause a problem is PhiNode (B) was a user of A. In this scenario, if B has been coerced, we would hit an assert regarding the incompatible type between the PHINode and its incoming value. Deleting non-coerced PHINodes from the map, and propagating the removal to users, resolves the issue.
1 parent d1a86a2 commit eb1efd8

File tree

2 files changed

+103
-6
lines changed

2 files changed

+103
-6
lines changed

llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -365,13 +365,25 @@ bool LiveRegOptimizer::optimizeLiveType(
365365
else
366366
MissingIncVal = true;
367367
}
368-
Instruction *DeadInst = Phi;
368+
369369
if (MissingIncVal) {
370-
DeadInst = cast<Instruction>(ValMap[Phi]);
371-
// Do not use the dead phi
372-
ValMap[Phi] = Phi;
373-
}
374-
DeadInsts.emplace_back(DeadInst);
370+
Value *DeadVal = ValMap[Phi];
371+
// The coercion chain of the PHI is broken. Delete the Phi
372+
// from the ValMap and any connected / user Phis.
373+
SmallVector<Value *, 4> PHIWorklist;
374+
PHIWorklist.push_back(DeadVal);
375+
while (!PHIWorklist.empty()) {
376+
Value *NextDeadValue = PHIWorklist.pop_back_val();
377+
ValMap.erase(NextDeadValue);
378+
DeadInsts.emplace_back(cast<Instruction>(NextDeadValue));
379+
380+
for (User *U : NextDeadValue->users()) {
381+
if (ValMap.contains(cast<PHINode>(U)))
382+
PHIWorklist.push_back(U);
383+
}
384+
}
385+
} else
386+
DeadInsts.emplace_back(cast<Instruction>(Phi));
375387
}
376388
// Coerce back to the original type and replace the uses.
377389
for (Instruction *U : Uses) {

llvm/test/CodeGen/AMDGPU/vni8-across-blocks.ll

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -866,5 +866,90 @@ bb.3:
866866
ret void
867867
}
868868

869+
; This should not cause Assertion `getType() == V->getType() && "All operands to PHI node must be the same type as the PHI node
870+
; Note: whether or not the assertion fires depends on the iteration ortder of PhiNodes in AMDGPULateCodeGenPrepare, which
871+
; is non-deterministic due to iterators over a set of pointers.
872+
873+
874+
define amdgpu_kernel void @MissingInc_PhiChain(i1 %cmp, <16 x i8> %input) {
875+
; GFX906-LABEL: MissingInc_PhiChain:
876+
; GFX906: ; %bb.0: ; %entry
877+
; GFX906-NEXT: s_load_dword s2, s[0:1], 0x24
878+
; GFX906-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34
879+
; GFX906-NEXT: s_mov_b32 s10, 1
880+
; GFX906-NEXT: v_mov_b32_e32 v4, 1
881+
; GFX906-NEXT: s_mov_b32 s11, 1
882+
; GFX906-NEXT: s_waitcnt lgkmcnt(0)
883+
; GFX906-NEXT: s_bitcmp1_b32 s2, 0
884+
; GFX906-NEXT: s_cselect_b64 s[2:3], -1, 0
885+
; GFX906-NEXT: s_xor_b64 s[0:1], s[2:3], -1
886+
; GFX906-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
887+
; GFX906-NEXT: v_cmp_ne_u32_e64 s[0:1], 1, v0
888+
; GFX906-NEXT: s_branch .LBB14_2
889+
; GFX906-NEXT: .LBB14_1: ; %bb.5
890+
; GFX906-NEXT: ; in Loop: Header=BB14_2 Depth=1
891+
; GFX906-NEXT: v_lshrrev_b32_e32 v4, 8, v0
892+
; GFX906-NEXT: s_mov_b32 s10, 0
893+
; GFX906-NEXT: s_mov_b32 s11, 0
894+
; GFX906-NEXT: .LBB14_2: ; %bb.1
895+
; GFX906-NEXT: ; =>This Inner Loop Header: Depth=1
896+
; GFX906-NEXT: s_and_b64 vcc, exec, s[0:1]
897+
; GFX906-NEXT: s_mov_b64 s[8:9], s[2:3]
898+
; GFX906-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
899+
; GFX906-NEXT: s_cbranch_vccnz .LBB14_4
900+
; GFX906-NEXT: ; %bb.3: ; %bb.2
901+
; GFX906-NEXT: ; in Loop: Header=BB14_2 Depth=1
902+
; GFX906-NEXT: v_lshlrev_b16_e64 v0, 8, s11
903+
; GFX906-NEXT: v_or_b32_sdwa v0, s10, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
904+
; GFX906-NEXT: v_lshlrev_b16_e32 v1, 8, v4
905+
; GFX906-NEXT: v_or_b32_e32 v0, v1, v0
906+
; GFX906-NEXT: s_mov_b64 s[8:9], -1
907+
; GFX906-NEXT: .LBB14_4: ; %Flow
908+
; GFX906-NEXT: ; in Loop: Header=BB14_2 Depth=1
909+
; GFX906-NEXT: s_andn2_b64 vcc, exec, s[8:9]
910+
; GFX906-NEXT: s_cbranch_vccnz .LBB14_7
911+
; GFX906-NEXT: ; %bb.5: ; %bb.3
912+
; GFX906-NEXT: ; in Loop: Header=BB14_2 Depth=1
913+
; GFX906-NEXT: s_and_b64 vcc, exec, s[0:1]
914+
; GFX906-NEXT: s_cbranch_vccnz .LBB14_1
915+
; GFX906-NEXT: ; %bb.6: ; %bb.4
916+
; GFX906-NEXT: ; in Loop: Header=BB14_2 Depth=1
917+
; GFX906-NEXT: v_mov_b32_e32 v0, s4
918+
; GFX906-NEXT: v_mov_b32_e32 v1, s5
919+
; GFX906-NEXT: v_mov_b32_e32 v2, s6
920+
; GFX906-NEXT: v_mov_b32_e32 v3, s7
921+
; GFX906-NEXT: s_branch .LBB14_1
922+
; GFX906-NEXT: .LBB14_7: ; in Loop: Header=BB14_2 Depth=1
923+
; GFX906-NEXT: ; implicit-def: $vgpr4
924+
; GFX906-NEXT: ; implicit-def: $sgpr10
925+
; GFX906-NEXT: ; implicit-def: $sgpr11
926+
; GFX906-NEXT: s_cbranch_execz .LBB14_2
927+
; GFX906-NEXT: ; %bb.8: ; %DummyReturnBlock
928+
; GFX906-NEXT: s_endpgm
929+
entry:
930+
br label %bb.1
931+
932+
bb.1: ; preds = %bb.5, %entry
933+
%phi1 = phi <16 x i8> [ <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, %entry ], [ %shuffle, %bb.5 ]
934+
br i1 %cmp, label %bb.3, label %bb.2
935+
936+
bb.2: ; preds = %bb.1
937+
%insert = insertelement <16 x i8> %phi1, i8 0, i64 0
938+
br label %bb.3
939+
940+
bb.3: ; preds = %bb.2, %bb.1
941+
%phi2 = phi <16 x i8> [ %insert, %bb.2 ], [ %phi1, %bb.1 ]
942+
br i1 %cmp, label %bb.5, label %bb.4
943+
944+
bb.4: ; preds = %bb.3
945+
br label %bb.5
946+
947+
bb.5: ; preds = %bb.4, %bb.3
948+
%phi3 = phi <16 x i8> [ %input, %bb.4 ], [ %phi2, %bb.3 ]
949+
%shuffle = shufflevector <16 x i8> %phi3, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
950+
br label %bb.1
951+
}
952+
953+
869954

870955
declare i32 @llvm.amdgcn.workitem.id.x()

0 commit comments

Comments
 (0)