@@ -7232,43 +7232,50 @@ bool SIInstrWorklist::isDeferred(MachineInstr *MI) {
7232
7232
// This is mainly caused by 16bit SALU and 16bit VALU using reg with different
7233
7233
// size. Need to legalize the size of the operands during the vgpr lowering
7234
7234
// chain. This can be removed after we have sgpr16 in place
7235
- void SIInstrInfo::legalizeOperandsVALUt16 (MachineInstr &MI,
7235
+ void SIInstrInfo::legalizeOperandsVALUt16 (MachineInstr &MI, unsigned OpIdx,
7236
7236
MachineRegisterInfo &MRI) const {
7237
7237
if (!ST.useRealTrue16Insts ())
7238
7238
return ;
7239
7239
7240
7240
unsigned Opcode = MI.getOpcode ();
7241
7241
MachineBasicBlock *MBB = MI.getParent ();
7242
-
7243
7242
// legalize operands and check for size mismatch
7244
- for (MachineOperand &Op : MI.explicit_operands ()) {
7245
- unsigned OpIdx = Op.getOperandNo ();
7246
- if (!OpIdx)
7247
- continue ;
7248
- if (Op.isReg () && Op.getReg ().isVirtual ()) {
7249
- const TargetRegisterClass *DefRC = MRI.getRegClass (Op.getReg ());
7250
- if (!RI.isVGPRClass (DefRC))
7251
- continue ;
7252
- unsigned RCID = get (Opcode).operands ()[OpIdx].RegClass ;
7253
- const TargetRegisterClass *UseRC = RI.getRegClass (RCID);
7254
- if (RI.getMatchingSuperRegClass (DefRC, UseRC, AMDGPU::lo16)) {
7255
- Op.setSubReg (AMDGPU::lo16);
7256
- } else if (RI.getMatchingSuperRegClass (UseRC, DefRC, AMDGPU::lo16)) {
7257
- const DebugLoc &DL = MI.getDebugLoc ();
7258
- Register NewDstReg =
7259
- MRI.createVirtualRegister (&AMDGPU::VGPR_32RegClass);
7260
- Register Undef = MRI.createVirtualRegister (&AMDGPU::VGPR_16RegClass);
7261
- BuildMI (*MBB, MI, DL, get (AMDGPU::IMPLICIT_DEF), Undef);
7262
- BuildMI (*MBB, MI, DL, get (AMDGPU::REG_SEQUENCE), NewDstReg)
7263
- .addReg (Op.getReg ())
7264
- .addImm (AMDGPU::lo16)
7265
- .addReg (Undef)
7266
- .addImm (AMDGPU::hi16);
7267
- Op.setReg (NewDstReg);
7268
- }
7269
- }
7243
+ if (!OpIdx || OpIdx >= MI.getNumExplicitOperands ())
7244
+ return ;
7245
+
7246
+ MachineOperand &Op = MI.getOperand (OpIdx);
7247
+ if (!Op.isReg () || !Op.getReg ().isVirtual ())
7248
+ return ;
7249
+
7250
+ const TargetRegisterClass *CurrRC = MRI.getRegClass (Op.getReg ());
7251
+ if (!RI.isVGPRClass (CurrRC))
7252
+ return ;
7253
+
7254
+ if (OpIdx >= get (Opcode).getNumOperands ())
7255
+ return ;
7256
+
7257
+ unsigned RCID = get (Opcode).operands ()[OpIdx].RegClass ;
7258
+ const TargetRegisterClass *ExpectedRC = RI.getRegClass (RCID);
7259
+ if (RI.getMatchingSuperRegClass (CurrRC, ExpectedRC, AMDGPU::lo16)) {
7260
+ Op.setSubReg (AMDGPU::lo16);
7261
+ } else if (RI.getMatchingSuperRegClass (ExpectedRC, CurrRC, AMDGPU::lo16)) {
7262
+ const DebugLoc &DL = MI.getDebugLoc ();
7263
+ Register NewDstReg = MRI.createVirtualRegister (&AMDGPU::VGPR_32RegClass);
7264
+ Register Undef = MRI.createVirtualRegister (&AMDGPU::VGPR_16RegClass);
7265
+ BuildMI (*MBB, MI, DL, get (AMDGPU::IMPLICIT_DEF), Undef);
7266
+ BuildMI (*MBB, MI, DL, get (AMDGPU::REG_SEQUENCE), NewDstReg)
7267
+ .addReg (Op.getReg ())
7268
+ .addImm (AMDGPU::lo16)
7269
+ .addReg (Undef)
7270
+ .addImm (AMDGPU::hi16);
7271
+ Op.setReg (NewDstReg);
7270
7272
}
7271
7273
}
7274
+ void SIInstrInfo::legalizeOperandsVALUt16 (MachineInstr &MI,
7275
+ MachineRegisterInfo &MRI) const {
7276
+ for (unsigned OpIdx = 1 ; OpIdx < MI.getNumExplicitOperands (); OpIdx++)
7277
+ legalizeOperandsVALUt16 (MI, OpIdx, MRI);
7278
+ }
7272
7279
7273
7280
void SIInstrInfo::moveToVALU (SIInstrWorklist &Worklist,
7274
7281
MachineDominatorTree *MDT) const {
@@ -7789,15 +7796,14 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
7789
7796
return ;
7790
7797
}
7791
7798
7792
- // If this is a v2s copy src from 16bit to 32bit,
7793
- // replace vgpr copy to reg_sequence
7799
+ // If this is a v2s copy between 16bit and 32bit reg ,
7800
+ // replace vgpr copy to reg_sequence/extract_subreg
7794
7801
// This can be remove after we have sgpr16 in place
7795
7802
if (ST.useRealTrue16Insts () && Inst.isCopy () &&
7796
7803
Inst.getOperand (1 ).getReg ().isVirtual () &&
7797
7804
RI.isVGPR (MRI, Inst.getOperand (1 ).getReg ())) {
7798
7805
const TargetRegisterClass *SrcRegRC = getOpRegClass (Inst, 1 );
7799
- if (16 == RI.getRegSizeInBits (*SrcRegRC) &&
7800
- 32 == RI.getRegSizeInBits (*NewDstRC)) {
7806
+ if (RI.getMatchingSuperRegClass (NewDstRC, SrcRegRC, AMDGPU::lo16)) {
7801
7807
Register NewDstReg = MRI.createVirtualRegister (NewDstRC);
7802
7808
Register Undef = MRI.createVirtualRegister (&AMDGPU::VGPR_16RegClass);
7803
7809
BuildMI (*Inst.getParent (), &Inst, Inst.getDebugLoc (),
@@ -7810,18 +7816,13 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
7810
7816
.addImm (AMDGPU::hi16);
7811
7817
Inst.eraseFromParent ();
7812
7818
MRI.replaceRegWith (DstReg, NewDstReg);
7813
- // legalize useMI with mismatched size
7814
- for (MachineRegisterInfo::use_iterator I = MRI.use_begin (NewDstReg),
7815
- E = MRI.use_end ();
7816
- I != E; ++I) {
7817
- MachineInstr &UseMI = *I->getParent ();
7818
- unsigned UseMIOpcode = UseMI.getOpcode ();
7819
- if (AMDGPU::isTrue16Inst (UseMIOpcode) &&
7820
- (16 ==
7821
- RI.getRegSizeInBits (*getOpRegClass (UseMI, I.getOperandNo ())))) {
7822
- I->setSubReg (AMDGPU::lo16);
7823
- }
7824
- }
7819
+ addUsersToMoveToVALUWorklist (NewDstReg, MRI, Worklist);
7820
+ return ;
7821
+ } else if (RI.getMatchingSuperRegClass (SrcRegRC, NewDstRC,
7822
+ AMDGPU::lo16)) {
7823
+ Inst.getOperand (1 ).setSubReg (AMDGPU::lo16);
7824
+ Register NewDstReg = MRI.createVirtualRegister (NewDstRC);
7825
+ MRI.replaceRegWith (DstReg, NewDstReg);
7825
7826
addUsersToMoveToVALUWorklist (NewDstReg, MRI, Worklist);
7826
7827
return ;
7827
7828
}
@@ -7916,23 +7917,6 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
7916
7917
assert (NewDstRC);
7917
7918
NewDstReg = MRI.createVirtualRegister (NewDstRC);
7918
7919
MRI.replaceRegWith (DstReg, NewDstReg);
7919
-
7920
- // Check useMI of NewInstr. If used by a true16 instruction,
7921
- // add a lo16 subreg access if size mismatched
7922
- // This can be remove after we have sgpr16 in place
7923
- if (ST.useRealTrue16Insts () && NewDstRC == &AMDGPU::VGPR_32RegClass) {
7924
- for (MachineRegisterInfo::use_iterator I = MRI.use_begin (NewDstReg),
7925
- E = MRI.use_end ();
7926
- I != E; ++I) {
7927
- MachineInstr &UseMI = *I->getParent ();
7928
- unsigned UseMIOpcode = UseMI.getOpcode ();
7929
- if (AMDGPU::isTrue16Inst (UseMIOpcode) &&
7930
- (16 ==
7931
- RI.getRegSizeInBits (*getOpRegClass (UseMI, I.getOperandNo ())))) {
7932
- I->setSubReg (AMDGPU::lo16);
7933
- }
7934
- }
7935
- }
7936
7920
}
7937
7921
fixImplicitOperands (*NewInstr);
7938
7922
@@ -8740,6 +8724,8 @@ void SIInstrInfo::addUsersToMoveToVALUWorklist(
8740
8724
++I;
8741
8725
} while (I != E && I->getParent () == &UseMI);
8742
8726
} else {
8727
+ legalizeOperandsVALUt16 (UseMI, OpNo, MRI);
8728
+
8743
8729
++I;
8744
8730
}
8745
8731
}
0 commit comments