@@ -159,11 +159,15 @@ bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
159
159
160
160
// TODO: Skip masking high bits if def is known boolean.
161
161
162
+ bool IsSGPR = TRI.isSGPRClass (SrcRC);
162
163
unsigned AndOpc =
163
- TRI. isSGPRClass (SrcRC) ? AMDGPU::S_AND_B32 : AMDGPU::V_AND_B32_e32;
164
- BuildMI (*BB, &I, DL, TII.get (AndOpc), MaskedReg)
164
+ IsSGPR ? AMDGPU::S_AND_B32 : AMDGPU::V_AND_B32_e32;
165
+ auto And = BuildMI (*BB, &I, DL, TII.get (AndOpc), MaskedReg)
165
166
.addImm (1 )
166
167
.addReg (SrcReg);
168
+ if (IsSGPR)
169
+ And.setOperandDead (3 ); // Dead scc
170
+
167
171
BuildMI (*BB, &I, DL, TII.get (AMDGPU::V_CMP_NE_U32_e64), DstReg)
168
172
.addImm (0 )
169
173
.addReg (MaskedReg);
@@ -323,7 +327,8 @@ bool AMDGPUInstructionSelector::selectG_ADD_SUB(MachineInstr &I) const {
323
327
MachineInstr *Add =
324
328
BuildMI (*BB, &I, DL, TII.get (Opc), DstReg)
325
329
.add (I.getOperand (1 ))
326
- .add (I.getOperand (2 ));
330
+ .add (I.getOperand (2 ))
331
+ .setOperandDead (3 ); // Dead scc
327
332
I.eraseFromParent ();
328
333
return constrainSelectedInstRegOperands (*Add, TII, TRI, RBI);
329
334
}
@@ -370,7 +375,8 @@ bool AMDGPUInstructionSelector::selectG_ADD_SUB(MachineInstr &I) const {
370
375
.add (Lo2);
371
376
BuildMI (*BB, &I, DL, TII.get (AMDGPU::S_ADDC_U32), DstHi)
372
377
.add (Hi1)
373
- .add (Hi2);
378
+ .add (Hi2)
379
+ .setOperandDead (3 ); // Dead scc
374
380
} else {
375
381
const TargetRegisterClass *CarryRC = TRI.getWaveMaskRegClass ();
376
382
Register CarryReg = MRI->createVirtualRegister (CarryRC);
@@ -437,14 +443,18 @@ bool AMDGPUInstructionSelector::selectG_UADDO_USUBO_UADDE_USUBE(
437
443
unsigned NoCarryOpc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
438
444
unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
439
445
440
- BuildMI (*BB, &I, DL, TII.get (HasCarryIn ? CarryOpc : NoCarryOpc), Dst0Reg)
446
+ auto CarryInst = BuildMI (*BB, &I, DL, TII.get (HasCarryIn ? CarryOpc : NoCarryOpc), Dst0Reg)
441
447
.add (I.getOperand (2 ))
442
448
.add (I.getOperand (3 ));
443
- BuildMI (*BB, &I, DL, TII.get (AMDGPU::COPY), Dst1Reg)
444
- .addReg (AMDGPU::SCC);
445
449
446
- if (!MRI->getRegClassOrNull (Dst1Reg))
447
- MRI->setRegClass (Dst1Reg, &AMDGPU::SReg_32RegClass);
450
+ if (MRI->use_nodbg_empty (Dst1Reg)) {
451
+ CarryInst.setOperandDead (3 ); // Dead scc
452
+ } else {
453
+ BuildMI (*BB, &I, DL, TII.get (AMDGPU::COPY), Dst1Reg)
454
+ .addReg (AMDGPU::SCC);
455
+ if (!MRI->getRegClassOrNull (Dst1Reg))
456
+ MRI->setRegClass (Dst1Reg, &AMDGPU::SReg_32RegClass);
457
+ }
448
458
449
459
if (!RBI.constrainGenericRegister (Dst0Reg, AMDGPU::SReg_32RegClass, *MRI) ||
450
460
!RBI.constrainGenericRegister (Src0Reg, AMDGPU::SReg_32RegClass, *MRI) ||
@@ -741,7 +751,8 @@ bool AMDGPUInstructionSelector::selectG_BUILD_VECTOR(MachineInstr &MI) const {
741
751
// build_vector_trunc (lshr $src0, 16), 0 -> s_lshr_b32 $src0, 16
742
752
auto MIB = BuildMI (*BB, &MI, DL, TII.get (AMDGPU::S_LSHR_B32), Dst)
743
753
.addReg (ShiftSrc0)
744
- .addImm (16 );
754
+ .addImm (16 )
755
+ .setOperandDead (3 ); // Dead scc
745
756
746
757
MI.eraseFromParent ();
747
758
return constrainSelectedInstRegOperands (*MIB, TII, TRI, RBI);
@@ -1630,7 +1641,8 @@ bool AMDGPUInstructionSelector::selectDSGWSIntrinsic(MachineInstr &MI,
1630
1641
Register M0Base = MRI->createVirtualRegister (&AMDGPU::SReg_32RegClass);
1631
1642
BuildMI (*MBB, &MI, DL, TII.get (AMDGPU::S_LSHL_B32), M0Base)
1632
1643
.addReg (BaseOffset)
1633
- .addImm (16 );
1644
+ .addImm (16 )
1645
+ .setOperandDead (3 ); // Dead scc
1634
1646
1635
1647
BuildMI (*MBB, &MI, DL, TII.get (AMDGPU::COPY), AMDGPU::M0)
1636
1648
.addReg (M0Base);
@@ -2195,7 +2207,8 @@ bool AMDGPUInstructionSelector::selectG_TRUNC(MachineInstr &I) const {
2195
2207
} else {
2196
2208
BuildMI (*MBB, I, DL, TII.get (AMDGPU::S_LSHL_B32), TmpReg0)
2197
2209
.addReg (HiReg)
2198
- .addImm (16 );
2210
+ .addImm (16 )
2211
+ .setOperandDead (3 ); // Dead scc
2199
2212
}
2200
2213
2201
2214
unsigned MovOpc = IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;
@@ -2204,12 +2217,17 @@ bool AMDGPUInstructionSelector::selectG_TRUNC(MachineInstr &I) const {
2204
2217
2205
2218
BuildMI (*MBB, I, DL, TII.get (MovOpc), ImmReg)
2206
2219
.addImm (0xffff );
2207
- BuildMI (*MBB, I, DL, TII.get (AndOpc), TmpReg1)
2220
+ auto And = BuildMI (*MBB, I, DL, TII.get (AndOpc), TmpReg1)
2208
2221
.addReg (LoReg)
2209
2222
.addReg (ImmReg);
2210
- BuildMI (*MBB, I, DL, TII.get (OrOpc), DstReg)
2223
+ auto Or = BuildMI (*MBB, I, DL, TII.get (OrOpc), DstReg)
2211
2224
.addReg (TmpReg0)
2212
2225
.addReg (TmpReg1);
2226
+
2227
+ if (!IsVALU) {
2228
+ And.setOperandDead (3 ); // Dead scc
2229
+ Or.setOperandDead (3 ); // Dead scc
2230
+ }
2213
2231
}
2214
2232
2215
2233
I.eraseFromParent ();
@@ -2354,7 +2372,8 @@ bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const {
2354
2372
if (Signed) {
2355
2373
BuildMI (MBB, I, DL, TII.get (AMDGPU::S_ASHR_I32), HiReg)
2356
2374
.addReg (SrcReg, 0 , SubReg)
2357
- .addImm (31 );
2375
+ .addImm (31 )
2376
+ .setOperandDead (3 ); // Dead scc
2358
2377
} else {
2359
2378
BuildMI (MBB, I, DL, TII.get (AMDGPU::S_MOV_B32), HiReg)
2360
2379
.addImm (0 );
@@ -2398,7 +2417,8 @@ bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const {
2398
2417
if (!Signed && shouldUseAndMask (SrcSize, Mask)) {
2399
2418
BuildMI (MBB, I, DL, TII.get (AMDGPU::S_AND_B32), DstReg)
2400
2419
.addReg (SrcReg)
2401
- .addImm (Mask);
2420
+ .addImm (Mask)
2421
+ .setOperandDead (3 ); // Dead scc
2402
2422
} else {
2403
2423
BuildMI (MBB, I, DL, TII.get (BFE32), DstReg)
2404
2424
.addReg (SrcReg)
@@ -2532,7 +2552,8 @@ bool AMDGPUInstructionSelector::selectG_FNEG(MachineInstr &MI) const {
2532
2552
unsigned Opc = Fabs ? AMDGPU::S_OR_B32 : AMDGPU::S_XOR_B32;
2533
2553
BuildMI (*BB, &MI, DL, TII.get (Opc), OpReg)
2534
2554
.addReg (HiReg)
2535
- .addReg (ConstReg);
2555
+ .addReg (ConstReg)
2556
+ .setOperandDead (3 ); // Dead scc
2536
2557
BuildMI (*BB, &MI, DL, TII.get (AMDGPU::REG_SEQUENCE), Dst)
2537
2558
.addReg (LoReg)
2538
2559
.addImm (AMDGPU::sub0)
@@ -2573,7 +2594,8 @@ bool AMDGPUInstructionSelector::selectG_FABS(MachineInstr &MI) const {
2573
2594
// TODO: Should this used S_BITSET0_*?
2574
2595
BuildMI (*BB, &MI, DL, TII.get (AMDGPU::S_AND_B32), OpReg)
2575
2596
.addReg (HiReg)
2576
- .addReg (ConstReg);
2597
+ .addReg (ConstReg)
2598
+ .setOperandDead (3 ); // Dead scc
2577
2599
BuildMI (*BB, &MI, DL, TII.get (AMDGPU::REG_SEQUENCE), Dst)
2578
2600
.addReg (LoReg)
2579
2601
.addImm (AMDGPU::sub0)
@@ -2731,7 +2753,8 @@ bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr &I) const {
2731
2753
Register TmpReg = MRI->createVirtualRegister (TRI.getBoolRC ());
2732
2754
BuildMI (*BB, &I, DL, TII.get (Opcode), TmpReg)
2733
2755
.addReg (CondReg)
2734
- .addReg (Exec);
2756
+ .addReg (Exec)
2757
+ .setOperandDead (3 ); // Dead scc
2735
2758
CondReg = TmpReg;
2736
2759
}
2737
2760
@@ -2794,7 +2817,8 @@ bool AMDGPUInstructionSelector::selectG_PTRMASK(MachineInstr &I) const {
2794
2817
!CanCopyLow32 && !CanCopyHi32) {
2795
2818
auto MIB = BuildMI (*BB, &I, DL, TII.get (AMDGPU::S_AND_B64), DstReg)
2796
2819
.addReg (SrcReg)
2797
- .addReg (MaskReg);
2820
+ .addReg (MaskReg)
2821
+ .setOperandDead (3 ); // Dead scc
2798
2822
I.eraseFromParent ();
2799
2823
return constrainSelectedInstRegOperands (*MIB, TII, TRI, RBI);
2800
2824
}
@@ -2817,9 +2841,12 @@ bool AMDGPUInstructionSelector::selectG_PTRMASK(MachineInstr &I) const {
2817
2841
assert (MaskTy.getSizeInBits () == 32 &&
2818
2842
" ptrmask should have been narrowed during legalize" );
2819
2843
2820
- BuildMI (*BB, &I, DL, TII.get (NewOpc), DstReg)
2844
+ auto NewOp = BuildMI (*BB, &I, DL, TII.get (NewOpc), DstReg)
2821
2845
.addReg (SrcReg)
2822
2846
.addReg (MaskReg);
2847
+
2848
+ if (!IsVGPR)
2849
+ NewOp.setOperandDead (3 ); // Dead scc
2823
2850
I.eraseFromParent ();
2824
2851
return true ;
2825
2852
}
@@ -3325,7 +3352,8 @@ bool AMDGPUInstructionSelector::selectWaveAddress(MachineInstr &MI) const {
3325
3352
} else {
3326
3353
BuildMI (*MBB, MI, DL, TII.get (AMDGPU::S_LSHR_B32), DstReg)
3327
3354
.addReg (SrcReg)
3328
- .addImm (Subtarget->getWavefrontSizeLog2 ());
3355
+ .addImm (Subtarget->getWavefrontSizeLog2 ())
3356
+ .setOperandDead (3 ); // Dead scc
3329
3357
}
3330
3358
3331
3359
const TargetRegisterClass &RC =
@@ -4114,7 +4142,8 @@ AMDGPUInstructionSelector::selectScratchSAddr(MachineOperand &Root) const {
4114
4142
4115
4143
BuildMI (*BB, &I, DL, TII.get (AMDGPU::S_ADD_I32), SAddr)
4116
4144
.addFrameIndex (FI)
4117
- .addReg (RHSDef->Reg );
4145
+ .addReg (RHSDef->Reg )
4146
+ .setOperandDead (3 ); // Dead scc
4118
4147
}
4119
4148
}
4120
4149
0 commit comments