@@ -278,11 +278,10 @@ LLVM_DUMP_METHOD void SIWholeQuadMode::printInfo() {
278
278
279
279
for (const MachineInstr &MI : *BII.first ) {
280
280
auto III = Instructions.find (&MI);
281
- if (III == Instructions.end ())
282
- continue ;
283
-
284
- dbgs () << " " << MI << " Needs = " << PrintState (III->second .Needs )
285
- << " , OutNeeds = " << PrintState (III->second .OutNeeds ) << ' \n ' ;
281
+ if (III != Instructions.end ()) {
282
+ dbgs () << " " << MI << " Needs = " << PrintState (III->second .Needs )
283
+ << " , OutNeeds = " << PrintState (III->second .OutNeeds ) << ' \n ' ;
284
+ }
286
285
}
287
286
}
288
287
}
@@ -455,10 +454,8 @@ void SIWholeQuadMode::markOperand(const MachineInstr &MI,
455
454
for (MCRegUnit Unit : TRI->regunits (Reg.asMCReg ())) {
456
455
LiveRange &LR = LIS->getRegUnit (Unit);
457
456
const VNInfo *Value = LR.Query (LIS->getInstructionIndex (MI)).valueIn ();
458
- if (!Value)
459
- continue ;
460
-
461
- markDefs (MI, LR, Unit, AMDGPU::NoSubRegister, Flag, Worklist);
457
+ if (Value)
458
+ markDefs (MI, LR, Unit, AMDGPU::NoSubRegister, Flag, Worklist);
462
459
}
463
460
}
464
461
}
@@ -499,19 +496,16 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
499
496
500
497
if (TII->isWQM (Opcode)) {
501
498
// If LOD is not supported WQM is not needed.
502
- if (!ST->hasExtendedImageInsts ())
503
- continue ;
504
499
// Only generate implicit WQM if implicit derivatives are required.
505
500
// This avoids inserting unintended WQM if a shader type without
506
501
// implicit derivatives uses an image sampling instruction.
507
- if (!HasImplicitDerivatives)
508
- continue ;
509
- // Sampling instructions don't need to produce results for all pixels
510
- // in a quad, they just require all inputs of a quad to have been
511
- // computed for derivatives.
512
- markInstructionUses (MI, StateWQM, Worklist);
513
- GlobalFlags |= StateWQM;
514
- continue ;
502
+ if (ST->hasExtendedImageInsts () && HasImplicitDerivatives) {
503
+ // Sampling instructions don't need to produce results for all pixels
504
+ // in a quad, they just require all inputs of a quad to have been
505
+ // computed for derivatives.
506
+ markInstructionUses (MI, StateWQM, Worklist);
507
+ GlobalFlags |= StateWQM;
508
+ }
515
509
} else if (Opcode == AMDGPU::WQM) {
516
510
// The WQM intrinsic requires its output to have all the helper lanes
517
511
// correct, so we need it to be in WQM.
@@ -520,15 +514,13 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
520
514
} else if (Opcode == AMDGPU::SOFT_WQM) {
521
515
LowerToCopyInstrs.push_back (&MI);
522
516
SoftWQMInstrs.push_back (&MI);
523
- continue ;
524
517
} else if (Opcode == AMDGPU::STRICT_WWM) {
525
518
// The STRICT_WWM intrinsic doesn't make the same guarantee, and plus
526
519
// it needs to be executed in WQM or Exact so that its copy doesn't
527
520
// clobber inactive lanes.
528
521
markInstructionUses (MI, StateStrictWWM, Worklist);
529
522
GlobalFlags |= StateStrictWWM;
530
523
LowerToMovInstrs.push_back (&MI);
531
- continue ;
532
524
} else if (Opcode == AMDGPU::STRICT_WQM ||
533
525
TII->isDualSourceBlendEXP (MI)) {
534
526
// STRICT_WQM is similar to STRICTWWM, but instead of enabling all
@@ -551,7 +543,6 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
551
543
GlobalFlags |= StateExact;
552
544
III.Disabled = StateWQM | StateStrict;
553
545
}
554
- continue ;
555
546
} else if (Opcode == AMDGPU::LDS_PARAM_LOAD ||
556
547
Opcode == AMDGPU::DS_PARAM_LOAD ||
557
548
Opcode == AMDGPU::LDS_DIRECT_LOAD ||
@@ -561,7 +552,6 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
561
552
InstrInfo &II = Instructions[&MI];
562
553
II.Needs |= StateStrictWQM;
563
554
GlobalFlags |= StateStrictWQM;
564
- continue ;
565
555
} else if (Opcode == AMDGPU::V_SET_INACTIVE_B32 ||
566
556
Opcode == AMDGPU::V_SET_INACTIVE_B64) {
567
557
III.Disabled = StateStrict;
@@ -574,7 +564,6 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
574
564
}
575
565
}
576
566
SetInactiveInstrs.push_back (&MI);
577
- continue ;
578
567
} else if (TII->isDisableWQM (MI)) {
579
568
BBI.Needs |= StateExact;
580
569
if (!(BBI.InNeeds & StateExact)) {
@@ -583,40 +572,33 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
583
572
}
584
573
GlobalFlags |= StateExact;
585
574
III.Disabled = StateWQM | StateStrict;
586
- continue ;
587
- } else {
588
- if (Opcode == AMDGPU::SI_PS_LIVE || Opcode == AMDGPU::SI_LIVE_MASK) {
589
- LiveMaskQueries.push_back (&MI);
590
- } else if (Opcode == AMDGPU::SI_KILL_I1_TERMINATOR ||
591
- Opcode == AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR ||
592
- Opcode == AMDGPU::SI_DEMOTE_I1) {
593
- KillInstrs.push_back (&MI);
594
- BBI.NeedsLowering = true ;
595
- } else if (WQMOutputs) {
596
- // The function is in machine SSA form, which means that physical
597
- // VGPRs correspond to shader inputs and outputs. Inputs are
598
- // only used, outputs are only defined.
599
- // FIXME: is this still valid?
600
- for (const MachineOperand &MO : MI.defs ()) {
601
- if (!MO.isReg ())
602
- continue ;
603
-
604
- Register Reg = MO.getReg ();
605
-
606
- if (!Reg.isVirtual () &&
607
- TRI->hasVectorRegisters (TRI->getPhysRegBaseClass (Reg))) {
608
- Flags = StateWQM;
609
- break ;
610
- }
575
+ } else if (Opcode == AMDGPU::SI_PS_LIVE ||
576
+ Opcode == AMDGPU::SI_LIVE_MASK) {
577
+ LiveMaskQueries.push_back (&MI);
578
+ } else if (Opcode == AMDGPU::SI_KILL_I1_TERMINATOR ||
579
+ Opcode == AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR ||
580
+ Opcode == AMDGPU::SI_DEMOTE_I1) {
581
+ KillInstrs.push_back (&MI);
582
+ BBI.NeedsLowering = true ;
583
+ } else if (WQMOutputs) {
584
+ // The function is in machine SSA form, which means that physical
585
+ // VGPRs correspond to shader inputs and outputs. Inputs are
586
+ // only used, outputs are only defined.
587
+ // FIXME: is this still valid?
588
+ for (const MachineOperand &MO : MI.defs ()) {
589
+ Register Reg = MO.getReg ();
590
+ if (Reg.isPhysical () &&
591
+ TRI->hasVectorRegisters (TRI->getPhysRegBaseClass (Reg))) {
592
+ Flags = StateWQM;
593
+ break ;
611
594
}
612
595
}
613
-
614
- if (!Flags)
615
- continue ;
616
596
}
617
597
618
- markInstruction (MI, Flags, Worklist);
619
- GlobalFlags |= Flags;
598
+ if (Flags) {
599
+ markInstruction (MI, Flags, Worklist);
600
+ GlobalFlags |= Flags;
601
+ }
620
602
}
621
603
}
622
604
@@ -1568,8 +1550,6 @@ void SIWholeQuadMode::lowerKillInstrs(bool IsWQM) {
1568
1550
case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
1569
1551
SplitPoint = lowerKillF32 (*MBB, *MI);
1570
1552
break ;
1571
- default :
1572
- continue ;
1573
1553
}
1574
1554
if (SplitPoint)
1575
1555
splitBlock (MBB, SplitPoint);
0 commit comments