@@ -7235,24 +7235,44 @@ bool SIInstrWorklist::isDeferred(MachineInstr *MI) {
7235
7235
return DeferredList.contains (MI);
7236
7236
}
7237
7237
7238
- // 16bit SALU use sgpr32. If a 16bit SALU get lowered to VALU in true16 mode,
7239
- // sgpr32 is replaced to vgpr32 which is illegal in t16 inst. Need to add
7240
- // subreg access properly. This can be removed after we have sgpr16 in place
7241
- void SIInstrInfo::legalizeOperandsVALUt16 (MachineInstr &Inst,
7238
+ // legalize operand between 16bit and 32bit registers in v2s copy
7239
+ // lowering (change spgr to vgpr).
7240
+ // This is mainly caused by 16bit SALU and 16bit VALU using reg with different
7241
+ // size. Need to legalize the size of the operands during the vgpr lowering
7242
+ // chain. This can be removed after we have sgpr16 in place
7243
+ void SIInstrInfo::legalizeOperandsVALUt16 (MachineInstr &MI,
7242
7244
MachineRegisterInfo &MRI) const {
7243
- unsigned Opcode = Inst.getOpcode ();
7244
- if (!AMDGPU::isTrue16Inst (Opcode) || !ST.useRealTrue16Insts ())
7245
+ if (!ST.useRealTrue16Insts ())
7245
7246
return ;
7246
7247
7247
- for (MachineOperand &Op : Inst.explicit_operands ()) {
7248
+ unsigned Opcode = MI.getOpcode ();
7249
+ MachineBasicBlock *MBB = MI.getParent ();
7250
+
7251
+ // legalize operands and check for size mismatch
7252
+ for (MachineOperand &Op : MI.explicit_operands ()) {
7248
7253
unsigned OpIdx = Op.getOperandNo ();
7249
7254
if (!OpIdx)
7250
7255
continue ;
7251
- if (Op.isReg () && RI.isVGPR (MRI, Op.getReg ())) {
7256
+ if (Op.isReg () && Op. getReg (). isVirtual () && RI.isVGPR (MRI, Op.getReg ())) {
7252
7257
unsigned RCID = get (Opcode).operands ()[OpIdx].RegClass ;
7253
- const TargetRegisterClass *RC = RI.getRegClass (RCID);
7254
- if (RI.getRegSizeInBits (*RC) == 16 ) {
7258
+ const TargetRegisterClass *ExpectedRC = RI.getRegClass (RCID);
7259
+ const TargetRegisterClass *RC = MRI.getRegClass (Op.getReg ());
7260
+ if (32 == RI.getRegSizeInBits (*RC) &&
7261
+ 16 == RI.getRegSizeInBits (*ExpectedRC)) {
7255
7262
Op.setSubReg (AMDGPU::lo16);
7263
+ } else if (16 == RI.getRegSizeInBits (*RC) &&
7264
+ 32 == RI.getRegSizeInBits (*ExpectedRC)) {
7265
+ const DebugLoc &DL = MI.getDebugLoc ();
7266
+ Register NewDstReg =
7267
+ MRI.createVirtualRegister (&AMDGPU::VGPR_32RegClass);
7268
+ Register Undef = MRI.createVirtualRegister (&AMDGPU::VGPR_16RegClass);
7269
+ BuildMI (*MBB, MI, DL, get (AMDGPU::IMPLICIT_DEF), Undef);
7270
+ BuildMI (*MBB, MI, DL, get (AMDGPU::REG_SEQUENCE), NewDstReg)
7271
+ .addReg (Op.getReg ())
7272
+ .addImm (AMDGPU::lo16)
7273
+ .addReg (Undef)
7274
+ .addImm (AMDGPU::hi16);
7275
+ Op.setReg (NewDstReg);
7256
7276
}
7257
7277
}
7258
7278
}
@@ -7793,8 +7813,19 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
7793
7813
.add (Inst.getOperand (1 ))
7794
7814
.add (MachineOperand::CreateImm (AMDGPU::lo16));
7795
7815
Inst.eraseFromParent ();
7796
-
7797
7816
MRI.replaceRegWith (DstReg, NewDstReg);
7817
+ // legalize useMI with mismatched size
7818
+ for (MachineRegisterInfo::use_iterator I = MRI.use_begin (NewDstReg),
7819
+ E = MRI.use_end ();
7820
+ I != E; ++I) {
7821
+ MachineInstr &UseMI = *I->getParent ();
7822
+ unsigned UseMIOpcode = UseMI.getOpcode ();
7823
+ if (AMDGPU::isTrue16Inst (UseMIOpcode) &&
7824
+ (16 ==
7825
+ RI.getRegSizeInBits (*getOpRegClass (UseMI, I.getOperandNo ())))) {
7826
+ I->setSubReg (AMDGPU::lo16);
7827
+ }
7828
+ }
7798
7829
addUsersToMoveToVALUWorklist (NewDstReg, MRI, Worklist);
7799
7830
return ;
7800
7831
}
0 commit comments