Skip to content

Commit 62a737f

Browse files
jrbyrnesyuxuanchen1997
authored andcommitted
[AMDGPU] Reland: Do not use original PHIs in coercion chains
Summary: Change-Id: I579b5c69a85997f168ed35354b326524b6f84ef7 Test Plan: Reviewers: Subscribers: Tasks: Tags: Differential Revision: https://phabricator.intern.facebook.com/D60251418
1 parent d5ea308 commit 62a737f

File tree

2 files changed

+260
-10
lines changed

2 files changed

+260
-10
lines changed

llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp

Lines changed: 41 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -365,29 +365,60 @@ bool LiveRegOptimizer::optimizeLiveType(
365365
else
366366
MissingIncVal = true;
367367
}
368-
Instruction *DeadInst = Phi;
369368
if (MissingIncVal) {
370-
DeadInst = cast<Instruction>(ValMap[Phi]);
371-
// Do not use the dead phi
372-
ValMap[Phi] = Phi;
369+
Value *DeadVal = ValMap[Phi];
370+
// The coercion chain of the PHI is broken. Delete the Phi
371+
// from the ValMap and any connected / user Phis.
372+
SmallVector<Value *, 4> PHIWorklist;
373+
SmallPtrSet<Value *, 4> Visited;
374+
PHIWorklist.push_back(DeadVal);
375+
while (!PHIWorklist.empty()) {
376+
Value *NextDeadValue = PHIWorklist.pop_back_val();
377+
Visited.insert(NextDeadValue);
378+
auto OriginalPhi =
379+
std::find_if(PhiNodes.begin(), PhiNodes.end(),
380+
[this, &NextDeadValue](PHINode *CandPhi) {
381+
return ValMap[CandPhi] == NextDeadValue;
382+
});
383+
// This PHI may have already been removed from maps when
384+
// unwinding a previous Phi
385+
if (OriginalPhi != PhiNodes.end())
386+
ValMap.erase(*OriginalPhi);
387+
388+
DeadInsts.emplace_back(cast<Instruction>(NextDeadValue));
389+
390+
for (User *U : NextDeadValue->users()) {
391+
if (!Visited.contains(cast<PHINode>(U)))
392+
PHIWorklist.push_back(U);
393+
}
394+
}
395+
} else {
396+
DeadInsts.emplace_back(cast<Instruction>(Phi));
373397
}
374-
DeadInsts.emplace_back(DeadInst);
375398
}
376399
// Coerce back to the original type and replace the uses.
377400
for (Instruction *U : Uses) {
378401
// Replace all converted operands for a use.
379402
for (auto [OpIdx, Op] : enumerate(U->operands())) {
380-
if (ValMap.contains(Op)) {
403+
if (ValMap.contains(Op) && ValMap[Op]) {
381404
Value *NewVal = nullptr;
382405
if (BBUseValMap.contains(U->getParent()) &&
383406
BBUseValMap[U->getParent()].contains(ValMap[Op]))
384407
NewVal = BBUseValMap[U->getParent()][ValMap[Op]];
385408
else {
386409
BasicBlock::iterator InsertPt = U->getParent()->getFirstNonPHIIt();
387-
NewVal =
388-
convertFromOptType(Op->getType(), cast<Instruction>(ValMap[Op]),
389-
InsertPt, U->getParent());
390-
BBUseValMap[U->getParent()][ValMap[Op]] = NewVal;
410+
// We may pick up ops that were previously converted for users in
411+
// other blocks. If there is an originally typed definition of the Op
412+
// already in this block, simply reuse it.
413+
if (isa<Instruction>(Op) && !isa<PHINode>(Op) &&
414+
U->getParent() == cast<Instruction>(Op)->getParent()) {
415+
NewVal = Op;
416+
} else {
417+
NewVal =
418+
convertFromOptType(Op->getType(), cast<Instruction>(ValMap[Op]),
419+
InsertPt, U->getParent());
420+
BBUseValMap[U->getParent()][ValMap[Op]] = NewVal;
421+
}
391422
}
392423
assert(NewVal);
393424
U->setOperand(OpIdx, NewVal);

llvm/test/CodeGen/AMDGPU/vni8-live-reg-opt.ll

Lines changed: 219 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -349,4 +349,223 @@ bb.2:
349349
ret void
350350
}
351351

352+
; Should not produce a broken phi
353+
354+
define void @broken_phi() {
355+
; GFX906-LABEL: define void @broken_phi(
356+
; GFX906-SAME: ) #[[ATTR0]] {
357+
; GFX906-NEXT: bb:
358+
; GFX906-NEXT: br label [[BB1:%.*]]
359+
; GFX906: bb1:
360+
; GFX906-NEXT: [[I:%.*]] = phi <4 x i8> [ <i8 1, i8 1, i8 1, i8 1>, [[BB:%.*]] ], [ [[I8:%.*]], [[BB7:%.*]] ]
361+
; GFX906-NEXT: br i1 false, label [[BB3:%.*]], label [[BB2:%.*]]
362+
; GFX906: bb2:
363+
; GFX906-NEXT: br label [[BB3]]
364+
; GFX906: bb3:
365+
; GFX906-NEXT: [[I4:%.*]] = phi <4 x i8> [ zeroinitializer, [[BB2]] ], [ [[I]], [[BB1]] ]
366+
; GFX906-NEXT: br i1 false, label [[BB7]], label [[BB5:%.*]]
367+
; GFX906: bb5:
368+
; GFX906-NEXT: [[I6:%.*]] = call <4 x i8> @llvm.smax.v4i8(<4 x i8> [[I4]], <4 x i8> zeroinitializer)
369+
; GFX906-NEXT: br label [[BB7]]
370+
; GFX906: bb7:
371+
; GFX906-NEXT: [[I8]] = phi <4 x i8> [ zeroinitializer, [[BB5]] ], [ zeroinitializer, [[BB3]] ]
372+
; GFX906-NEXT: br label [[BB1]]
373+
;
374+
bb:
375+
br label %bb1
376+
bb1:
377+
%i = phi <4 x i8> [ <i8 1, i8 1, i8 1, i8 1>, %bb ], [ %i8, %bb7 ]
378+
br i1 false, label %bb3, label %bb2
379+
bb2:
380+
br label %bb3
381+
bb3:
382+
%i4 = phi <4 x i8> [ zeroinitializer, %bb2 ], [ %i, %bb1 ]
383+
br i1 false, label %bb7, label %bb5
384+
bb5:
385+
%i6 = call <4 x i8> @llvm.smax.v4i8(<4 x i8> %i4, <4 x i8> zeroinitializer)
386+
br label %bb7
387+
bb7:
388+
%i8 = phi <4 x i8> [ zeroinitializer, %bb5 ], [ zeroinitializer, %bb3 ]
389+
br label %bb1
390+
}
391+
392+
; %sel1 should just use %sel0 instead of trying to convert back the
393+
; converted version of %sel0
394+
395+
define amdgpu_kernel void @reuseOp() {
396+
; GFX906-LABEL: define amdgpu_kernel void @reuseOp(
397+
; GFX906-SAME: ) #[[ATTR0]] {
398+
; GFX906-NEXT: entry:
399+
; GFX906-NEXT: [[VEC1:%.*]] = insertelement <16 x i8> zeroinitializer, i8 0, i64 0
400+
; GFX906-NEXT: [[VEC1_BC:%.*]] = bitcast <16 x i8> [[VEC1]] to <4 x i32>
401+
; GFX906-NEXT: br label [[BB_1:%.*]]
402+
; GFX906: bb.1:
403+
; GFX906-NEXT: [[VEC1_BC_BC:%.*]] = bitcast <4 x i32> [[VEC1_BC]] to <16 x i8>
404+
; GFX906-NEXT: [[SEL0:%.*]] = select i1 false, <16 x i8> zeroinitializer, <16 x i8> zeroinitializer
405+
; GFX906-NEXT: [[SEL0_BC:%.*]] = bitcast <16 x i8> [[SEL0]] to <4 x i32>
406+
; GFX906-NEXT: [[SEL1:%.*]] = select i1 false, <16 x i8> [[VEC1_BC_BC]], <16 x i8> [[SEL0]]
407+
; GFX906-NEXT: br label [[BB_2:%.*]]
408+
; GFX906: bb.2:
409+
; GFX906-NEXT: [[SEL0_BC_BC:%.*]] = bitcast <4 x i32> [[SEL0_BC]] to <16 x i8>
410+
; GFX906-NEXT: [[VAL:%.*]] = extractelement <16 x i8> [[SEL0_BC_BC]], i64 0
411+
; GFX906-NEXT: ret void
412+
;
413+
entry:
414+
%vec1 = insertelement <16 x i8> zeroinitializer, i8 0, i64 0
415+
br label %bb.1
416+
417+
bb.1:
418+
%sel0 = select i1 false, <16 x i8> zeroinitializer, <16 x i8> zeroinitializer
419+
%sel1 = select i1 false, <16 x i8> %vec1, <16 x i8> %sel0
420+
br label %bb.2
421+
422+
bb.2:
423+
%val = extractelement <16 x i8> %sel0, i64 0
424+
ret void
425+
}
426+
427+
428+
define amdgpu_kernel void @deletedPHI(i32 %in0, i1 %cmp, <10 x i8> %invec0) {
429+
; GFX906-LABEL: define amdgpu_kernel void @deletedPHI(
430+
; GFX906-SAME: i32 [[IN0:%.*]], i1 [[CMP:%.*]], <10 x i8> [[INVEC0:%.*]]) #[[ATTR0]] {
431+
; GFX906-NEXT: entry:
432+
; GFX906-NEXT: br label [[BB_1:%.*]]
433+
; GFX906: bb.1:
434+
; GFX906-NEXT: [[PHI0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ 1, [[BB_11:%.*]] ]
435+
; GFX906-NEXT: [[PHI1:%.*]] = phi <10 x i8> [ <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, [[ENTRY]] ], [ [[VEC1:%.*]], [[BB_11]] ]
436+
; GFX906-NEXT: br i1 [[CMP]], label [[BB_3:%.*]], label [[BB_2:%.*]]
437+
; GFX906: bb.2:
438+
; GFX906-NEXT: br label [[BB_3]]
439+
; GFX906: bb.3:
440+
; GFX906-NEXT: [[PHI2:%.*]] = phi <10 x i8> [ zeroinitializer, [[BB_2]] ], [ [[PHI1]], [[BB_1]] ]
441+
; GFX906-NEXT: br i1 [[CMP]], label [[BB_5:%.*]], label [[BB_4:%.*]]
442+
; GFX906: bb.4:
443+
; GFX906-NEXT: [[VEC0:%.*]] = insertelement <10 x i8> [[PHI2]], i8 0, i64 0
444+
; GFX906-NEXT: br label [[BB_5]]
445+
; GFX906: bb.5:
446+
; GFX906-NEXT: [[PHI3:%.*]] = phi <10 x i8> [ [[VEC0]], [[BB_4]] ], [ [[PHI2]], [[BB_3]] ]
447+
; GFX906-NEXT: br i1 [[CMP]], label [[BB_7:%.*]], label [[BB_6:%.*]]
448+
; GFX906: bb.6:
449+
; GFX906-NEXT: br label [[BB_7]]
450+
; GFX906: bb.7:
451+
; GFX906-NEXT: [[PHI4:%.*]] = phi <10 x i8> [ [[INVEC0]], [[BB_6]] ], [ [[PHI3]], [[BB_5]] ]
452+
; GFX906-NEXT: br i1 [[CMP]], label [[BB_9:%.*]], label [[BB_8:%.*]]
453+
; GFX906: bb.8:
454+
; GFX906-NEXT: br label [[BB_9]]
455+
; GFX906: bb.9:
456+
; GFX906-NEXT: [[PHI5:%.*]] = phi <10 x i8> [ [[INVEC0]], [[BB_8]] ], [ [[PHI4]], [[BB_7]] ]
457+
; GFX906-NEXT: br i1 [[CMP]], label [[BB_11]], label [[BB_10:%.*]]
458+
; GFX906: bb.10:
459+
; GFX906-NEXT: br label [[BB_11]]
460+
; GFX906: bb.11:
461+
; GFX906-NEXT: [[PHI6:%.*]] = phi <10 x i8> [ zeroinitializer, [[BB_10]] ], [ [[PHI5]], [[BB_9]] ]
462+
; GFX906-NEXT: [[VEC1]] = shufflevector <10 x i8> [[PHI6]], <10 x i8> zeroinitializer, <10 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 15, i32 16, i32 17, i32 18, i32 19>
463+
; GFX906-NEXT: br label [[BB_1]]
464+
;
465+
entry:
466+
br label %bb.1
467+
468+
bb.1:
469+
%phi0 = phi i32 [ 0, %entry ], [ 1, %bb.11 ]
470+
%phi1 = phi <10 x i8> [ <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, %entry ], [ %vec1, %bb.11 ]
471+
br i1 %cmp, label %bb.3, label %bb.2
472+
473+
bb.2:
474+
br label %bb.3
475+
476+
bb.3:
477+
%phi2 = phi <10 x i8> [ zeroinitializer, %bb.2 ], [ %phi1, %bb.1 ]
478+
br i1 %cmp, label %bb.5, label %bb.4
479+
480+
bb.4:
481+
%vec0 = insertelement <10 x i8> %phi2, i8 0, i64 0
482+
br label %bb.5
483+
484+
bb.5: ; preds = %bb.4, %bb.3
485+
%phi3 = phi <10 x i8> [ %vec0, %bb.4 ], [ %phi2, %bb.3 ]
486+
br i1 %cmp, label %bb.7, label %bb.6
487+
488+
bb.6:
489+
br label %bb.7
490+
491+
bb.7: ; preds = %bb.6, %bb.5
492+
%phi4 = phi <10 x i8> [ %invec0, %bb.6 ], [ %phi3, %bb.5 ]
493+
br i1 %cmp, label %bb.9, label %bb.8
494+
495+
bb.8:
496+
br label %bb.9
497+
498+
bb.9:
499+
%phi5 = phi <10 x i8> [ %invec0, %bb.8 ], [ %phi4, %bb.7 ]
500+
br i1 %cmp, label %bb.11, label %bb.10
501+
502+
bb.10:
503+
br label %bb.11
504+
505+
bb.11:
506+
%phi6 = phi <10 x i8> [ zeroinitializer, %bb.10 ], [ %phi5, %bb.9 ]
507+
%vec1 = shufflevector <10 x i8> %phi6, <10 x i8> zeroinitializer, <10 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 15, i32 16, i32 17, i32 18, i32 19>
508+
br label %bb.1
509+
}
510+
511+
define amdgpu_kernel void @multiple_unwind(i1 %cmp, <10 x i8> %invec) {
512+
; GFX906-LABEL: define amdgpu_kernel void @multiple_unwind(
513+
; GFX906-SAME: i1 [[CMP:%.*]], <10 x i8> [[INVEC:%.*]]) #[[ATTR0]] {
514+
; GFX906-NEXT: entry:
515+
; GFX906-NEXT: br label [[BB_1:%.*]]
516+
; GFX906: bb.1:
517+
; GFX906-NEXT: [[PHI0:%.*]] = phi <10 x i8> [ <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, [[ENTRY:%.*]] ], [ [[PHI3:%.*]], [[BB_8:%.*]] ]
518+
; GFX906-NEXT: br i1 [[CMP]], label [[BB_3:%.*]], label [[BB_2:%.*]]
519+
; GFX906: bb.2:
520+
; GFX906-NEXT: br label [[BB_3]]
521+
; GFX906: bb.3:
522+
; GFX906-NEXT: [[PHI1:%.*]] = phi <10 x i8> [ zeroinitializer, [[BB_2]] ], [ [[PHI0]], [[BB_1]] ]
523+
; GFX906-NEXT: br i1 [[CMP]], label [[BB_5:%.*]], label [[BB_4:%.*]]
524+
; GFX906: bb.4:
525+
; GFX906-NEXT: br label [[BB_5]]
526+
; GFX906: bb.5:
527+
; GFX906-NEXT: [[PHI2:%.*]] = phi <10 x i8> [ [[PHI0]], [[BB_4]] ], [ [[PHI1]], [[BB_3]] ]
528+
; GFX906-NEXT: br i1 [[CMP]], label [[BB_7:%.*]], label [[BB_6:%.*]]
529+
; GFX906: bb.6:
530+
; GFX906-NEXT: br label [[BB_7]]
531+
; GFX906: bb.7:
532+
; GFX906-NEXT: [[PHI3]] = phi <10 x i8> [ [[INVEC]], [[BB_6]] ], [ [[PHI2]], [[BB_5]] ]
533+
; GFX906-NEXT: br label [[BB_8]]
534+
; GFX906: bb.8:
535+
; GFX906-NEXT: br label [[BB_1]]
536+
;
537+
entry:
538+
br label %bb.1
539+
540+
bb.1:
541+
%phi0 = phi <10 x i8> [ <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, %entry ], [ %phi3, %bb.8 ]
542+
br i1 %cmp, label %bb.3, label %bb.2
543+
544+
bb.2:
545+
br label %bb.3
546+
547+
bb.3:
548+
%phi1 = phi <10 x i8> [ zeroinitializer, %bb.2 ], [ %phi0, %bb.1 ]
549+
br i1 %cmp, label %bb.5, label %bb.4
550+
551+
bb.4:
552+
br label %bb.5
553+
554+
bb.5:
555+
%phi2 = phi <10 x i8> [ %phi0, %bb.4 ], [ %phi1, %bb.3 ]
556+
br i1 %cmp, label %bb.7, label %bb.6
557+
558+
bb.6: ; preds = %bb.5
559+
br label %bb.7
560+
561+
bb.7:
562+
%phi3 = phi <10 x i8> [ %invec, %bb.6 ], [ %phi2, %bb.5 ]
563+
br label %bb.8
564+
565+
bb.8:
566+
br label %bb.1
567+
}
568+
569+
570+
352571
declare i32 @llvm.amdgcn.workitem.id.x()

0 commit comments

Comments
 (0)