@@ -797,6 +797,23 @@ int64_t SIRegisterInfo::getScratchInstrOffset(const MachineInstr *MI) const {
797
797
798
798
int64_t SIRegisterInfo::getFrameIndexInstrOffset (const MachineInstr *MI,
799
799
int Idx) const {
800
+ switch (MI->getOpcode ()) {
801
+ case AMDGPU::V_ADD_U32_e32:
802
+ case AMDGPU::V_ADD_U32_e64:
803
+ case AMDGPU::V_ADD_CO_U32_e32: {
804
+ int OtherIdx = Idx == 1 ? 2 : 1 ;
805
+ const MachineOperand &OtherOp = MI->getOperand (OtherIdx);
806
+ return OtherOp.isImm () ? OtherOp.getImm () : 0 ;
807
+ }
808
+ case AMDGPU::V_ADD_CO_U32_e64: {
809
+ int OtherIdx = Idx == 2 ? 3 : 2 ;
810
+ const MachineOperand &OtherOp = MI->getOperand (OtherIdx);
811
+ return OtherOp.isImm () ? OtherOp.getImm () : 0 ;
812
+ }
813
+ default :
814
+ break ;
815
+ }
816
+
800
817
if (!SIInstrInfo::isMUBUF (*MI) && !SIInstrInfo::isFLATScratch (*MI))
801
818
return 0 ;
802
819
@@ -809,7 +826,60 @@ int64_t SIRegisterInfo::getFrameIndexInstrOffset(const MachineInstr *MI,
809
826
return getScratchInstrOffset (MI);
810
827
}
811
828
829
+ static bool isFIPlusImmOrVGPR (const SIRegisterInfo &TRI,
830
+ const MachineInstr &MI) {
831
+ assert (MI.getDesc ().isAdd ());
832
+ const MachineOperand &Src0 = MI.getOperand (1 );
833
+ const MachineOperand &Src1 = MI.getOperand (2 );
834
+
835
+ if (Src0.isFI ()) {
836
+ return Src1.isImm () || (Src1.isReg () && TRI.isVGPR (MI.getMF ()->getRegInfo (),
837
+ Src1.getReg ()));
838
+ }
839
+
840
+ if (Src1.isFI ()) {
841
+ return Src0.isImm () || (Src0.isReg () && TRI.isVGPR (MI.getMF ()->getRegInfo (),
842
+ Src0.getReg ()));
843
+ }
844
+
845
+ return false ;
846
+ }
847
+
812
848
bool SIRegisterInfo::needsFrameBaseReg (MachineInstr *MI, int64_t Offset) const {
849
+ // TODO: Handle v_add_co_u32, v_or_b32, v_and_b32 and scalar opcodes.
850
+ switch (MI->getOpcode ()) {
851
+ case AMDGPU::V_ADD_U32_e32: {
852
+ // TODO: We could handle this but it requires work to avoid violating
853
+ // operand restrictions.
854
+ if (ST.getConstantBusLimit (AMDGPU::V_ADD_U32_e32) < 2 &&
855
+ !isFIPlusImmOrVGPR (*this , *MI))
856
+ return false ;
857
+ [[fallthrough]];
858
+ }
859
+ case AMDGPU::V_ADD_U32_e64:
860
+ // FIXME: This optimization is barely profitable enableFlatScratch as-is.
861
+ //
862
+ // Much of the benefit with the MUBUF handling is we avoid duplicating the
863
+ // shift of the frame register, which isn't needed with scratch.
864
+ //
865
+ // materializeFrameBaseRegister doesn't know the register classes of the
866
+ // uses, and unconditionally uses an s_add_i32, which will end up using a
867
+ // copy for the vector uses.
868
+ return !ST.enableFlatScratch ();
869
+ case AMDGPU::V_ADD_CO_U32_e32:
870
+ if (ST.getConstantBusLimit (AMDGPU::V_ADD_CO_U32_e32) < 2 &&
871
+ !isFIPlusImmOrVGPR (*this , *MI))
872
+ return false ;
873
+ // We can't deal with the case where the carry out has a use (though this
874
+ // should never happen)
875
+ return MI->getOperand (3 ).isDead ();
876
+ case AMDGPU::V_ADD_CO_U32_e64:
877
+ // TODO: Should we check use_empty instead?
878
+ return MI->getOperand (1 ).isDead ();
879
+ default :
880
+ break ;
881
+ }
882
+
813
883
if (!SIInstrInfo::isMUBUF (*MI) && !SIInstrInfo::isFLATScratch (*MI))
814
884
return false ;
815
885
@@ -860,6 +930,8 @@ Register SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
860
930
.addFrameIndex (FrameIdx);
861
931
862
932
if (ST.enableFlatScratch () ) {
933
+ // FIXME: Mark scc as dead
934
+ // FIXME: Make sure scc isn't live in.
863
935
BuildMI (*MBB, Ins, DL, TII->get (AMDGPU::S_ADD_I32), BaseReg)
864
936
.addReg (OffsetReg, RegState::Kill)
865
937
.addReg (FIReg);
@@ -877,6 +949,86 @@ Register SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
877
949
void SIRegisterInfo::resolveFrameIndex (MachineInstr &MI, Register BaseReg,
878
950
int64_t Offset) const {
879
951
const SIInstrInfo *TII = ST.getInstrInfo ();
952
+
953
+ switch (MI.getOpcode ()) {
954
+ case AMDGPU::V_ADD_U32_e32:
955
+ case AMDGPU::V_ADD_CO_U32_e32: {
956
+ MachineOperand *FIOp = &MI.getOperand (2 );
957
+ MachineOperand *ImmOp = &MI.getOperand (1 );
958
+ if (!FIOp->isFI ())
959
+ std::swap (FIOp, ImmOp);
960
+
961
+ if (!ImmOp->isImm ()) {
962
+ assert (Offset == 0 );
963
+ FIOp->ChangeToRegister (BaseReg, false );
964
+ TII->legalizeOperandsVOP2 (MI.getMF ()->getRegInfo (), MI);
965
+ return ;
966
+ }
967
+
968
+ int64_t TotalOffset = ImmOp->getImm () + Offset;
969
+ if (TotalOffset == 0 ) {
970
+ MI.setDesc (TII->get (AMDGPU::COPY));
971
+ for (unsigned I = MI.getNumOperands () - 1 ; I != 1 ; --I)
972
+ MI.removeOperand (I);
973
+
974
+ MI.getOperand (1 ).ChangeToRegister (BaseReg, false );
975
+ return ;
976
+ }
977
+
978
+ ImmOp->setImm (TotalOffset);
979
+
980
+ MachineBasicBlock *MBB = MI.getParent ();
981
+ MachineFunction *MF = MBB->getParent ();
982
+ MachineRegisterInfo &MRI = MF->getRegInfo ();
983
+
984
+ // FIXME: materializeFrameBaseRegister does not know the register class of
985
+ // the uses of the frame index, and assumes SGPR for enableFlatScratch. Emit
986
+ // a copy so we have a legal operand and hope the register coalescer can
987
+ // clean it up.
988
+ if (isSGPRReg (MRI, BaseReg)) {
989
+ Register BaseRegVGPR =
990
+ MRI.createVirtualRegister (&AMDGPU::VGPR_32RegClass);
991
+ BuildMI (*MBB, MI, MI.getDebugLoc (), TII->get (AMDGPU::COPY), BaseRegVGPR)
992
+ .addReg (BaseReg);
993
+ MI.getOperand (2 ).ChangeToRegister (BaseRegVGPR, false );
994
+ } else {
995
+ MI.getOperand (2 ).ChangeToRegister (BaseReg, false );
996
+ }
997
+ return ;
998
+ }
999
+ case AMDGPU::V_ADD_U32_e64:
1000
+ case AMDGPU::V_ADD_CO_U32_e64: {
1001
+ int Src0Idx = MI.getNumExplicitDefs ();
1002
+ MachineOperand *FIOp = &MI.getOperand (Src0Idx);
1003
+ MachineOperand *ImmOp = &MI.getOperand (Src0Idx + 1 );
1004
+ if (!FIOp->isFI ())
1005
+ std::swap (FIOp, ImmOp);
1006
+
1007
+ if (!ImmOp->isImm ()) {
1008
+ FIOp->ChangeToRegister (BaseReg, false );
1009
+ TII->legalizeOperandsVOP3 (MI.getMF ()->getRegInfo (), MI);
1010
+ return ;
1011
+ }
1012
+
1013
+ int64_t TotalOffset = ImmOp->getImm () + Offset;
1014
+ if (TotalOffset == 0 ) {
1015
+ MI.setDesc (TII->get (AMDGPU::COPY));
1016
+
1017
+ for (unsigned I = MI.getNumOperands () - 1 ; I != 1 ; --I)
1018
+ MI.removeOperand (I);
1019
+
1020
+ MI.getOperand (1 ).ChangeToRegister (BaseReg, false );
1021
+ } else {
1022
+ FIOp->ChangeToRegister (BaseReg, false );
1023
+ ImmOp->setImm (TotalOffset);
1024
+ }
1025
+
1026
+ return ;
1027
+ }
1028
+ default :
1029
+ break ;
1030
+ }
1031
+
880
1032
bool IsFlat = TII->isFLATScratch (MI);
881
1033
882
1034
#ifndef NDEBUG
@@ -925,6 +1077,18 @@ void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, Register BaseReg,
925
1077
bool SIRegisterInfo::isFrameOffsetLegal (const MachineInstr *MI,
926
1078
Register BaseReg,
927
1079
int64_t Offset) const {
1080
+
1081
+ switch (MI->getOpcode ()) {
1082
+ case AMDGPU::V_ADD_U32_e32:
1083
+ case AMDGPU::V_ADD_CO_U32_e32:
1084
+ return true ;
1085
+ case AMDGPU::V_ADD_U32_e64:
1086
+ case AMDGPU::V_ADD_CO_U32_e64:
1087
+ return ST.hasVOP3Literal () || AMDGPU::isInlinableIntLiteral (Offset);
1088
+ default :
1089
+ break ;
1090
+ }
1091
+
928
1092
if (!SIInstrInfo::isMUBUF (*MI) && !SIInstrInfo::isFLATScratch (*MI))
929
1093
return false ;
930
1094
0 commit comments