@@ -7228,6 +7228,29 @@ bool SIInstrWorklist::isDeferred(MachineInstr *MI) {
7228
7228
return DeferredList.contains (MI);
7229
7229
}
7230
7230
7231
+ // 16bit SALU use sgpr32. If a 16bit SALU get lowered to VALU in true16 mode,
7232
+ // sgpr32 is replaced to vgpr32 which is illegal in t16 inst. Need to add
7233
+ // subreg access properly. This can be removed after we have sgpr16 in place
7234
+ void SIInstrInfo::legalizeOperandsVALUt16 (MachineInstr &Inst,
7235
+ MachineRegisterInfo &MRI) const {
7236
+ unsigned Opcode = Inst.getOpcode ();
7237
+ if (!AMDGPU::isTrue16Inst (Opcode) || !ST.useRealTrue16Insts ())
7238
+ return ;
7239
+
7240
+ for (MachineOperand &Op : Inst.explicit_operands ()) {
7241
+ unsigned OpIdx = Op.getOperandNo ();
7242
+ if (!OpIdx)
7243
+ continue ;
7244
+ if (Op.isReg () && RI.isVGPR (MRI, Op.getReg ())) {
7245
+ unsigned RCID = get (Opcode).operands ()[OpIdx].RegClass ;
7246
+ const TargetRegisterClass *RC = RI.getRegClass (RCID);
7247
+ if (RI.getRegSizeInBits (*RC) == 16 ) {
7248
+ Op.setSubReg (AMDGPU::lo16);
7249
+ }
7250
+ }
7251
+ }
7252
+ }
7253
+
7231
7254
void SIInstrInfo::moveToVALU (SIInstrWorklist &Worklist,
7232
7255
MachineDominatorTree *MDT) const {
7233
7256
@@ -7613,6 +7636,7 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
7613
7636
.add (Inst.getOperand (0 ))
7614
7637
.add (Inst.getOperand (1 ));
7615
7638
}
7639
+ legalizeOperandsVALUt16 (*NewInstr, MRI);
7616
7640
legalizeOperands (*NewInstr, MDT);
7617
7641
int SCCIdx = Inst.findRegisterDefOperandIdx (AMDGPU::SCC, /* TRI=*/ nullptr );
7618
7642
MachineOperand SCCOp = Inst.getOperand (SCCIdx);
@@ -7682,6 +7706,7 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
7682
7706
.addImm (0 ) // omod
7683
7707
.addImm (0 ); // opsel0
7684
7708
MRI.replaceRegWith (Inst.getOperand (0 ).getReg (), NewDst);
7709
+ legalizeOperandsVALUt16 (*NewInstr, MRI);
7685
7710
legalizeOperands (*NewInstr, MDT);
7686
7711
addUsersToMoveToVALUWorklist (NewDst, MRI, Worklist);
7687
7712
Inst.eraseFromParent ();
@@ -7747,6 +7772,7 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
7747
7772
7748
7773
// If this is a v2s copy src from vgpr16 to sgpr32,
7749
7774
// replace vgpr copy to subreg_to_reg
7775
+ // This can be remove after we have sgpr16 in place
7750
7776
if (ST.useRealTrue16Insts () && Inst.isCopy () &&
7751
7777
Inst.getOperand (1 ).getReg ().isVirtual () &&
7752
7778
RI.isVGPR (MRI, Inst.getOperand (1 ).getReg ())) {
@@ -7785,11 +7811,7 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
7785
7811
NewInstr.addImm (0 );
7786
7812
if (AMDGPU::hasNamedOperand (NewOpcode, AMDGPU::OpName::src0)) {
7787
7813
MachineOperand Src = Inst.getOperand (1 );
7788
- if (AMDGPU::isTrue16Inst (NewOpcode) && ST.useRealTrue16Insts () &&
7789
- Src.isReg () && RI.isVGPR (MRI, Src.getReg ()))
7790
- NewInstr.addReg (Src.getReg (), 0 , AMDGPU::lo16);
7791
- else
7792
- NewInstr->addOperand (Src);
7814
+ NewInstr->addOperand (Src);
7793
7815
}
7794
7816
7795
7817
if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) {
@@ -7863,6 +7885,7 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
7863
7885
7864
7886
// Check useMI of NewInstr. If used by a true16 instruction,
7865
7887
// add a lo16 subreg access if size mismatched
7888
+ // This can be remove after we have sgpr16 in place
7866
7889
if (ST.useRealTrue16Insts () && NewDstRC == &AMDGPU::VGPR_32RegClass) {
7867
7890
for (MachineRegisterInfo::use_iterator I = MRI.use_begin (NewDstReg),
7868
7891
E = MRI.use_end ();
@@ -7878,6 +7901,9 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
7878
7901
}
7879
7902
}
7880
7903
fixImplicitOperands (*NewInstr);
7904
+
7905
+ legalizeOperandsVALUt16 (*NewInstr, MRI);
7906
+
7881
7907
// Legalize the operands
7882
7908
legalizeOperands (*NewInstr, MDT);
7883
7909
if (NewDstReg)
0 commit comments