@@ -724,24 +724,39 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
724
724
const DebugLoc &DL, MCRegister DestReg,
725
725
MCRegister SrcReg, bool KillSrc) const {
726
726
const TargetRegisterClass *RC = RI.getPhysRegBaseClass (DestReg);
727
+ unsigned Size = RI.getRegSizeInBits (*RC);
728
+ const TargetRegisterClass *SrcRC = RI.getPhysRegBaseClass (SrcReg);
729
+ unsigned SrcSize = RI.getRegSizeInBits (*SrcRC);
730
+
731
+ // The rest of copyPhysReg assumes Src and Dst size are the same size.
732
+ // TODO-GFX11_16BIT If all true 16 bit instruction patterns are completed can
733
+ // we remove Fix16BitCopies and this code block?
734
+ if (Fix16BitCopies) {
735
+ if (((Size == 16 ) != (SrcSize == 16 ))) {
736
+ if (ST.hasTrue16BitInsts ()) {
737
+ // Non-VGPR Src and Dst will later be expanded back to 32 bits.
738
+ MCRegister &RegToFix = (Size == 32 ) ? DestReg : SrcReg;
739
+ MCRegister SubReg = RI.getSubReg (RegToFix, AMDGPU::lo16);
740
+ RegToFix = SubReg;
741
+ } else {
742
+ MCRegister &RegToFix = (Size == 16 ) ? DestReg : SrcReg;
743
+ MCRegister Super = RI.get32BitRegister (RegToFix);
744
+ assert (RI.getSubReg (Super, AMDGPU::lo16) == RegToFix ||
745
+ RI.getSubReg (Super, AMDGPU::hi16) == RegToFix);
746
+ RegToFix = Super;
747
+ }
727
748
728
- // FIXME: This is hack to resolve copies between 16 bit and 32 bit
729
- // registers until all patterns are fixed.
730
- if (Fix16BitCopies &&
731
- ((RI.getRegSizeInBits (*RC) == 16 ) ^
732
- (RI.getRegSizeInBits (*RI.getPhysRegBaseClass (SrcReg)) == 16 ))) {
733
- MCRegister &RegToFix = (RI.getRegSizeInBits (*RC) == 16 ) ? DestReg : SrcReg;
734
- MCRegister Super = RI.get32BitRegister (RegToFix);
735
- assert (RI.getSubReg (Super, AMDGPU::lo16) == RegToFix);
736
- RegToFix = Super;
737
-
738
- if (DestReg == SrcReg) {
739
- // Insert empty bundle since ExpandPostRA expects an instruction here.
740
- BuildMI (MBB, MI, DL, get (AMDGPU::BUNDLE));
741
- return ;
749
+ if (DestReg == SrcReg) {
750
+ // Identity copy. Insert empty bundle since ExpandPostRA expects an
751
+ // instruction here.
752
+ BuildMI (MBB, MI, DL, get (AMDGPU::BUNDLE));
753
+ return ;
754
+ }
755
+ RC = RI.getPhysRegBaseClass (DestReg);
756
+ Size = RI.getRegSizeInBits (*RC);
757
+ SrcRC = RI.getPhysRegBaseClass (SrcReg);
758
+ SrcSize = RI.getRegSizeInBits (*SrcRC);
742
759
}
743
-
744
- RC = RI.getPhysRegBaseClass (DestReg);
745
760
}
746
761
747
762
if (RC == &AMDGPU::VGPR_32RegClass) {
@@ -865,10 +880,8 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
865
880
return ;
866
881
}
867
882
868
- const unsigned Size = RI.getRegSizeInBits (*RC);
869
883
if (Size == 16 ) {
870
- assert (AMDGPU::VGPR_LO16RegClass.contains (SrcReg) ||
871
- AMDGPU::VGPR_HI16RegClass.contains (SrcReg) ||
884
+ assert (AMDGPU::VGPR_16RegClass.contains (SrcReg) ||
872
885
AMDGPU::SReg_LO16RegClass.contains (SrcReg) ||
873
886
AMDGPU::AGPR_LO16RegClass.contains (SrcReg));
874
887
@@ -906,6 +919,25 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
906
919
return ;
907
920
}
908
921
922
+ if (ST.hasTrue16BitInsts ()) {
923
+ if (IsSGPRSrc) {
924
+ assert (SrcLow);
925
+ SrcReg = NewSrcReg;
926
+ }
927
+ // Use the smaller instruction encoding if possible.
928
+ if (AMDGPU::VGPR_16_Lo128RegClass.contains (DestReg) &&
929
+ (IsSGPRSrc || AMDGPU::VGPR_16_Lo128RegClass.contains (SrcReg))) {
930
+ BuildMI (MBB, MI, DL, get (AMDGPU::V_MOV_B16_t16_e32), DestReg)
931
+ .addReg (SrcReg);
932
+ } else {
933
+ BuildMI (MBB, MI, DL, get (AMDGPU::V_MOV_B16_t16_e64), DestReg)
934
+ .addImm (0 ) // src0_modifiers
935
+ .addReg (SrcReg)
936
+ .addImm (0 ); // op_sel
937
+ }
938
+ return ;
939
+ }
940
+
909
941
if (IsSGPRSrc && !ST.hasSDWAScalar ()) {
910
942
if (!DstLow || !SrcLow) {
911
943
reportIllegalCopy (this , MBB, MI, DL, DestReg, SrcReg, KillSrc,
@@ -932,7 +964,6 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
932
964
return ;
933
965
}
934
966
935
- const TargetRegisterClass *SrcRC = RI.getPhysRegBaseClass (SrcReg);
936
967
if (RC == RI.getVGPR64Class () && (SrcRC == RC || RI.isSGPRClass (SrcRC))) {
937
968
if (ST.hasMovB64 ()) {
938
969
BuildMI (MBB, MI, DL, get (AMDGPU::V_MOV_B64_e32), DestReg)
@@ -1288,7 +1319,11 @@ unsigned SIInstrInfo::getMovOpcode(const TargetRegisterClass *DstRC) const {
1288
1319
1289
1320
if (RI.isAGPRClass (DstRC))
1290
1321
return AMDGPU::COPY;
1291
- if (RI.getRegSizeInBits (*DstRC) == 32 ) {
1322
+ if (RI.getRegSizeInBits (*DstRC) == 16 ) {
1323
+ // Assume hi bits are unneeded. Only _e64 true16 instructions are legal
1324
+ // before RA.
1325
+ return RI.isSGPRClass (DstRC) ? AMDGPU::COPY : AMDGPU::V_MOV_B16_t16_e64;
1326
+ } else if (RI.getRegSizeInBits (*DstRC) == 32 ) {
1292
1327
return RI.isSGPRClass (DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1293
1328
} else if (RI.getRegSizeInBits (*DstRC) == 64 && RI.isSGPRClass (DstRC)) {
1294
1329
return AMDGPU::S_MOV_B64;
0 commit comments