@@ -2098,8 +2098,20 @@ unsigned SIInstrInfo::getNumWaitStates(const MachineInstr &MI) {
2098
2098
}
2099
2099
}
2100
2100
2101
+ Register SIInstrInfo::findImplicitExecSrc (const MachineInstr &MI) {
2102
+ for (auto &Op : MI.implicit_operands ()) {
2103
+ if (Op.isDef ())
2104
+ continue ;
2105
+ Register OpReg = Op.getReg ();
2106
+ if (OpReg == AMDGPU::EXEC || OpReg == AMDGPU::EXEC_LO ||
2107
+ OpReg == AMDGPU::SCC)
2108
+ continue ;
2109
+ return OpReg;
2110
+ }
2111
+ return Register ();
2112
+ }
2113
+
2101
2114
bool SIInstrInfo::expandPostRAPseudo (MachineInstr &MI) const {
2102
- const SIRegisterInfo *TRI = ST.getRegisterInfo ();
2103
2115
MachineBasicBlock &MBB = *MI.getParent ();
2104
2116
DebugLoc DL = MBB.findDebugLoc (MI);
2105
2117
switch (MI.getOpcode ()) {
@@ -2286,21 +2298,12 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
2286
2298
MachineOperand &InactiveSrc = MI.getOperand (2 );
2287
2299
2288
2300
// Find implicit register defining lanes active outside WWM.
2301
+ Register ExecSrcReg = findImplicitExecSrc (MI);
2302
+ assert (ExecSrcReg && " V_SET_INACTIVE must be in known WWM region" );
2289
2303
// Note: default here is set to ExecReg so that functional MIR is still
2290
2304
// generated if implicit def is not found and assertions are disabled.
2291
- Register ExecSrcReg = ExecReg;
2292
- for (auto &Op : MI.implicit_operands ()) {
2293
- if (Op.isDef () || !Op.isReg ())
2294
- continue ;
2295
- Register OpReg = Op.getReg ();
2296
- if (OpReg == AMDGPU::EXEC || OpReg == AMDGPU::EXEC_LO ||
2297
- OpReg == AMDGPU::SCC)
2298
- continue ;
2299
- ExecSrcReg = OpReg;
2300
- break ;
2301
- }
2302
- assert (ExecSrcReg != ExecReg &&
2303
- " V_SET_INACTIVE must be in known WWM region" );
2305
+ if (!ExecSrcReg)
2306
+ ExecSrcReg = ExecReg;
2304
2307
2305
2308
// Ideally in WWM this operation is lowered to V_CNDMASK; however,
2306
2309
// constant bus constraints and the presence of literal constants
@@ -2329,20 +2332,26 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
2329
2332
(usesConstantBus (MRI, ActiveSrc, Desc.operands ()[Src1Idx]) ? 1 : 0 ) +
2330
2333
(usesConstantBus (MRI, InactiveSrc, Desc.operands ()[Src0Idx]) ? 1 : 0 );
2331
2334
int LiteralConstants =
2332
- (ActiveSrc.isImm () && !isInlineConstant (ActiveImm) ? 1 : 0 ) +
2333
- (InactiveSrc.isImm () && !isInlineConstant (InactiveImm) ? 1 : 0 );
2335
+ ((ActiveSrc.isReg () ||
2336
+ (ActiveSrc.isImm () && isInlineConstant (ActiveImm)))
2337
+ ? 0
2338
+ : 1 ) +
2339
+ ((InactiveSrc.isReg () ||
2340
+ (InactiveSrc.isImm () && isInlineConstant (InactiveImm)))
2341
+ ? 0
2342
+ : 1 );
2334
2343
2335
2344
bool UseVCndMask =
2336
2345
ConstantBusUses <= ConstantBusLimit && LiteralConstants <= LiteralLimit;
2337
2346
if (VMov64 && UseVCndMask) {
2338
2347
// Decomposition must not introduce new literals.
2339
2348
UseVCndMask &=
2340
2349
ActiveSrc.isReg () ||
2341
- (isInlineConstant (ActiveImmLo) && isInlineConstant (ActiveImmLo )) ||
2350
+ (isInlineConstant (ActiveImmLo) && isInlineConstant (ActiveImmHi )) ||
2342
2351
(!isInlineConstant (ActiveImm));
2343
2352
UseVCndMask &= InactiveSrc.isReg () ||
2344
2353
(isInlineConstant (InactiveImmLo) &&
2345
- isInlineConstant (InactiveImmLo )) ||
2354
+ isInlineConstant (InactiveImmHi )) ||
2346
2355
(!isInlineConstant (InactiveImm));
2347
2356
}
2348
2357
@@ -2352,34 +2361,34 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
2352
2361
ActiveSrc.isReg ()
2353
2362
? MachineOperand::CreateReg (
2354
2363
RI.getSubReg (ActiveSrc.getReg (), AMDGPU::sub0), false ,
2355
- /* isImp=*/ false , /* isKill*/ false )
2364
+ /* isImp=*/ false , /* isKill= */ false )
2356
2365
: MachineOperand::CreateImm (ActiveImmLo.getSExtValue ());
2357
2366
MachineOperand ActiveHi =
2358
2367
ActiveSrc.isReg ()
2359
2368
? MachineOperand::CreateReg (
2360
2369
RI.getSubReg (ActiveSrc.getReg (), AMDGPU::sub1), false ,
2361
- /* isImp=*/ false , /* isKill*/ ActiveSrc.isKill ())
2370
+ /* isImp=*/ false , /* isKill= */ ActiveSrc.isKill ())
2362
2371
: MachineOperand::CreateImm (ActiveImmHi.getSExtValue ());
2363
2372
MachineOperand InactiveLo =
2364
2373
InactiveSrc.isReg ()
2365
2374
? MachineOperand::CreateReg (
2366
2375
RI.getSubReg (InactiveSrc.getReg (), AMDGPU::sub0), false ,
2367
- /* isImp=*/ false , /* isKill*/ false )
2376
+ /* isImp=*/ false , /* isKill= */ false )
2368
2377
: MachineOperand::CreateImm (InactiveImmLo.getSExtValue ());
2369
2378
MachineOperand InactiveHi =
2370
2379
InactiveSrc.isReg ()
2371
2380
? MachineOperand::CreateReg (
2372
2381
RI.getSubReg (InactiveSrc.getReg (), AMDGPU::sub1), false ,
2373
- /* isImp=*/ false , /* isKill*/ InactiveSrc.isKill ())
2382
+ /* isImp=*/ false , /* isKill= */ InactiveSrc.isKill ())
2374
2383
: MachineOperand::CreateImm (InactiveImmHi.getSExtValue ());
2375
- BuildMI (MBB, MI, DL, get (Opcode) , RI.getSubReg (DstReg, AMDGPU::sub0))
2384
+ BuildMI (MBB, MI, DL, Desc , RI.getSubReg (DstReg, AMDGPU::sub0))
2376
2385
.addImm (0 )
2377
2386
.add (InactiveLo)
2378
2387
.addImm (0 )
2379
2388
.add (ActiveLo)
2380
2389
.addReg (ExecSrcReg)
2381
2390
.addReg (DstReg, RegState::ImplicitDefine);
2382
- BuildMI (MBB, MI, DL, get (Opcode) , RI.getSubReg (DstReg, AMDGPU::sub1))
2391
+ BuildMI (MBB, MI, DL, Desc , RI.getSubReg (DstReg, AMDGPU::sub1))
2383
2392
.addImm (0 )
2384
2393
.add (InactiveHi)
2385
2394
.addImm (0 )
@@ -2388,7 +2397,7 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
2388
2397
.addReg (DstReg, RegState::ImplicitDefine);
2389
2398
} else if (UseVCndMask) {
2390
2399
// Single V_CNDMASK_B32
2391
- BuildMI (MBB, MI, DL, get (Opcode) , DstReg)
2400
+ BuildMI (MBB, MI, DL, Desc , DstReg)
2392
2401
.addImm (0 )
2393
2402
.add (InactiveSrc)
2394
2403
.addImm (0 )
@@ -2406,9 +2415,9 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
2406
2415
// Set exec mask to inactive lanes,
2407
2416
// but only if active lanes would be overwritten.
2408
2417
if (DstIsActive) {
2409
- MachineInstr *ExecMI =
2410
- BuildMI (MBB, MI, DL, get (NotOpc), ExecReg) .addReg (ExecSrcReg);
2411
- ExecMI-> addRegisterDead (AMDGPU::SCC, TRI ); // SCC is overwritten
2418
+ BuildMI (MBB, MI, DL, get (NotOpc), ExecReg)
2419
+ .addReg (ExecSrcReg)
2420
+ . setOperandDead ( 3 ); // Dead scc
2412
2421
}
2413
2422
// Copy inactive lanes
2414
2423
MachineInstr *VMov =
0 commit comments