Skip to content

Commit 44afd5f

Browse files
committed
- Address reviewer comments.
- Add findImplicitExecSrc helper. - Use helper to ignore V_SET_INACTIVE instructions during WQM/WWM processing. This allows other passes to emit V_SET_INACTIVE for already known WWM sections. This supports #105822. - Add test for above.
1 parent 58dbdda commit 44afd5f

File tree

4 files changed

+74
-38
lines changed

4 files changed

+74
-38
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 37 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -2098,8 +2098,20 @@ unsigned SIInstrInfo::getNumWaitStates(const MachineInstr &MI) {
20982098
}
20992099
}
21002100

2101+
Register SIInstrInfo::findImplicitExecSrc(const MachineInstr &MI) {
2102+
for (auto &Op : MI.implicit_operands()) {
2103+
if (Op.isDef())
2104+
continue;
2105+
Register OpReg = Op.getReg();
2106+
if (OpReg == AMDGPU::EXEC || OpReg == AMDGPU::EXEC_LO ||
2107+
OpReg == AMDGPU::SCC)
2108+
continue;
2109+
return OpReg;
2110+
}
2111+
return Register();
2112+
}
2113+
21012114
bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
2102-
const SIRegisterInfo *TRI = ST.getRegisterInfo();
21032115
MachineBasicBlock &MBB = *MI.getParent();
21042116
DebugLoc DL = MBB.findDebugLoc(MI);
21052117
switch (MI.getOpcode()) {
@@ -2286,21 +2298,12 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
22862298
MachineOperand &InactiveSrc = MI.getOperand(2);
22872299

22882300
// Find implicit register defining lanes active outside WWM.
2301+
Register ExecSrcReg = findImplicitExecSrc(MI);
2302+
assert(ExecSrcReg && "V_SET_INACTIVE must be in known WWM region");
22892303
// Note: default here is set to ExecReg so that functional MIR is still
22902304
// generated if implicit def is not found and assertions are disabled.
2291-
Register ExecSrcReg = ExecReg;
2292-
for (auto &Op : MI.implicit_operands()) {
2293-
if (Op.isDef() || !Op.isReg())
2294-
continue;
2295-
Register OpReg = Op.getReg();
2296-
if (OpReg == AMDGPU::EXEC || OpReg == AMDGPU::EXEC_LO ||
2297-
OpReg == AMDGPU::SCC)
2298-
continue;
2299-
ExecSrcReg = OpReg;
2300-
break;
2301-
}
2302-
assert(ExecSrcReg != ExecReg &&
2303-
"V_SET_INACTIVE must be in known WWM region");
2305+
if (!ExecSrcReg)
2306+
ExecSrcReg = ExecReg;
23042307

23052308
// Ideally in WWM this operation is lowered to V_CNDMASK; however,
23062309
// constant bus constraints and the presence of literal constants
@@ -2329,20 +2332,26 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
23292332
(usesConstantBus(MRI, ActiveSrc, Desc.operands()[Src1Idx]) ? 1 : 0) +
23302333
(usesConstantBus(MRI, InactiveSrc, Desc.operands()[Src0Idx]) ? 1 : 0);
23312334
int LiteralConstants =
2332-
(ActiveSrc.isImm() && !isInlineConstant(ActiveImm) ? 1 : 0) +
2333-
(InactiveSrc.isImm() && !isInlineConstant(InactiveImm) ? 1 : 0);
2335+
((ActiveSrc.isReg() ||
2336+
(ActiveSrc.isImm() && isInlineConstant(ActiveImm)))
2337+
? 0
2338+
: 1) +
2339+
((InactiveSrc.isReg() ||
2340+
(InactiveSrc.isImm() && isInlineConstant(InactiveImm)))
2341+
? 0
2342+
: 1);
23342343

23352344
bool UseVCndMask =
23362345
ConstantBusUses <= ConstantBusLimit && LiteralConstants <= LiteralLimit;
23372346
if (VMov64 && UseVCndMask) {
23382347
// Decomposition must not introduce new literals.
23392348
UseVCndMask &=
23402349
ActiveSrc.isReg() ||
2341-
(isInlineConstant(ActiveImmLo) && isInlineConstant(ActiveImmLo)) ||
2350+
(isInlineConstant(ActiveImmLo) && isInlineConstant(ActiveImmHi)) ||
23422351
(!isInlineConstant(ActiveImm));
23432352
UseVCndMask &= InactiveSrc.isReg() ||
23442353
(isInlineConstant(InactiveImmLo) &&
2345-
isInlineConstant(InactiveImmLo)) ||
2354+
isInlineConstant(InactiveImmHi)) ||
23462355
(!isInlineConstant(InactiveImm));
23472356
}
23482357

@@ -2352,34 +2361,34 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
23522361
ActiveSrc.isReg()
23532362
? MachineOperand::CreateReg(
23542363
RI.getSubReg(ActiveSrc.getReg(), AMDGPU::sub0), false,
2355-
/*isImp=*/false, /*isKill*/ false)
2364+
/*isImp=*/false, /*isKill=*/false)
23562365
: MachineOperand::CreateImm(ActiveImmLo.getSExtValue());
23572366
MachineOperand ActiveHi =
23582367
ActiveSrc.isReg()
23592368
? MachineOperand::CreateReg(
23602369
RI.getSubReg(ActiveSrc.getReg(), AMDGPU::sub1), false,
2361-
/*isImp=*/false, /*isKill*/ ActiveSrc.isKill())
2370+
/*isImp=*/false, /*isKill=*/ActiveSrc.isKill())
23622371
: MachineOperand::CreateImm(ActiveImmHi.getSExtValue());
23632372
MachineOperand InactiveLo =
23642373
InactiveSrc.isReg()
23652374
? MachineOperand::CreateReg(
23662375
RI.getSubReg(InactiveSrc.getReg(), AMDGPU::sub0), false,
2367-
/*isImp=*/false, /*isKill*/ false)
2376+
/*isImp=*/false, /*isKill=*/false)
23682377
: MachineOperand::CreateImm(InactiveImmLo.getSExtValue());
23692378
MachineOperand InactiveHi =
23702379
InactiveSrc.isReg()
23712380
? MachineOperand::CreateReg(
23722381
RI.getSubReg(InactiveSrc.getReg(), AMDGPU::sub1), false,
2373-
/*isImp=*/false, /*isKill*/ InactiveSrc.isKill())
2382+
/*isImp=*/false, /*isKill=*/InactiveSrc.isKill())
23742383
: MachineOperand::CreateImm(InactiveImmHi.getSExtValue());
2375-
BuildMI(MBB, MI, DL, get(Opcode), RI.getSubReg(DstReg, AMDGPU::sub0))
2384+
BuildMI(MBB, MI, DL, Desc, RI.getSubReg(DstReg, AMDGPU::sub0))
23762385
.addImm(0)
23772386
.add(InactiveLo)
23782387
.addImm(0)
23792388
.add(ActiveLo)
23802389
.addReg(ExecSrcReg)
23812390
.addReg(DstReg, RegState::ImplicitDefine);
2382-
BuildMI(MBB, MI, DL, get(Opcode), RI.getSubReg(DstReg, AMDGPU::sub1))
2391+
BuildMI(MBB, MI, DL, Desc, RI.getSubReg(DstReg, AMDGPU::sub1))
23832392
.addImm(0)
23842393
.add(InactiveHi)
23852394
.addImm(0)
@@ -2388,7 +2397,7 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
23882397
.addReg(DstReg, RegState::ImplicitDefine);
23892398
} else if (UseVCndMask) {
23902399
// Single V_CNDMASK_B32
2391-
BuildMI(MBB, MI, DL, get(Opcode), DstReg)
2400+
BuildMI(MBB, MI, DL, Desc, DstReg)
23922401
.addImm(0)
23932402
.add(InactiveSrc)
23942403
.addImm(0)
@@ -2406,9 +2415,9 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
24062415
// Set exec mask to inactive lanes,
24072416
// but only if active lanes would be overwritten.
24082417
if (DstIsActive) {
2409-
MachineInstr *ExecMI =
2410-
BuildMI(MBB, MI, DL, get(NotOpc), ExecReg).addReg(ExecSrcReg);
2411-
ExecMI->addRegisterDead(AMDGPU::SCC, TRI); // SCC is overwritten
2418+
BuildMI(MBB, MI, DL, get(NotOpc), ExecReg)
2419+
.addReg(ExecSrcReg)
2420+
.setOperandDead(3); // Dead scc
24122421
}
24132422
// Copy inactive lanes
24142423
MachineInstr *VMov =

llvm/lib/Target/AMDGPU/SIInstrInfo.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1437,6 +1437,8 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
14371437
// This is used if an operand is a 32 bit register but needs to be aligned
14381438
// regardless.
14391439
void enforceOperandRCAlignment(MachineInstr &MI, unsigned OpName) const;
1440+
1441+
static Register findImplicitExecSrc(const MachineInstr &MI);
14401442
};
14411443

14421444
/// \brief Returns true if a reg:subreg pair P has a TRC class

llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -559,18 +559,24 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
559559
GlobalFlags |= StateStrictWQM;
560560
} else if (Opcode == AMDGPU::V_SET_INACTIVE_B32 ||
561561
Opcode == AMDGPU::V_SET_INACTIVE_B64) {
562-
// Disable strict states; StrictWQM will be added as required later.
563-
III.Disabled = StateStrict;
564-
MachineOperand &Inactive = MI.getOperand(2);
565-
if (Inactive.isReg()) {
566-
if (Inactive.isUndef()) {
567-
LowerToCopyInstrs.insert(&MI);
568-
} else {
569-
markOperand(MI, Inactive, StateStrictWWM, Worklist);
562+
// Ignore these if V_SET_INACTIVE which already has exec src register.
563+
// These are generated by an earlier pass which has seperately ensured
564+
// WWM and provided a mask of inactive lanes.
565+
Register ExecSrc = TII->findImplicitExecSrc(MI);
566+
if (!ExecSrc) {
567+
// Disable strict states; StrictWQM will be added as required later.
568+
III.Disabled = StateStrict;
569+
MachineOperand &Inactive = MI.getOperand(2);
570+
if (Inactive.isReg()) {
571+
if (Inactive.isUndef()) {
572+
LowerToCopyInstrs.insert(&MI);
573+
} else {
574+
markOperand(MI, Inactive, StateStrictWWM, Worklist);
575+
}
570576
}
577+
SetInactiveInstrs.push_back(&MI);
578+
BBI.NeedsLowering = true;
571579
}
572-
SetInactiveInstrs.push_back(&MI);
573-
BBI.NeedsLowering = true;
574580
} else if (TII->isDisableWQM(MI)) {
575581
BBI.Needs |= StateExact;
576582
if (!(BBI.InNeeds & StateExact)) {

llvm/test/CodeGen/AMDGPU/wqm.mir

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,9 @@
4040
define amdgpu_vs void @no_wqm_in_vs() {
4141
ret void
4242
}
43+
define amdgpu_ps void @preloaded_set_inactive() {
44+
ret void
45+
}
4346
...
4447
---
4548

@@ -443,3 +446,19 @@ body: |
443446
444447
%4:vreg_128 = IMAGE_SAMPLE_V4_V2 %0:vreg_64, %100:sgpr_256, %101:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4)
445448
...
449+
450+
---
451+
# Preserve V_SET_INACTIVE with exec mask already specified
452+
#CHECK-LABEL: name: preloaded_set_inactive
453+
#CHECK: V_SET_INACTIVE_B32
454+
name: preloaded_set_inactive
455+
tracksRegLiveness: true
456+
body: |
457+
bb.0:
458+
liveins: $vgpr1, $vgpr2
459+
460+
%0:vgpr_32 = COPY $vgpr1
461+
%1:vgpr_32 = COPY $vgpr2
462+
%mask:sreg_64 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
463+
%value:vgpr_32 = V_SET_INACTIVE_B32 %0:vgpr_32, %1:vgpr_32, implicit $exec, implicit-def $scc, implicit %mask:sreg_64
464+
...

0 commit comments

Comments
 (0)