@@ -945,42 +945,164 @@ void CombinerHelper::applySextInRegOfLoad(
945
945
MI.eraseFromParent ();
946
946
}
947
947
948
+ static Type *getTypeForLLT (LLT Ty, LLVMContext &C) {
949
+ if (Ty.isVector ())
950
+ return FixedVectorType::get (IntegerType::get (C, Ty.getScalarSizeInBits ()),
951
+ Ty.getNumElements ());
952
+ return IntegerType::get (C, Ty.getSizeInBits ());
953
+ }
954
+
955
+ // / Return true if 'MI' is a load or a store that may be fold it's address
956
+ // / operand into the load / store addressing mode.
957
+ static bool canFoldInAddressingMode (GLoadStore *MI, const TargetLowering &TLI,
958
+ MachineRegisterInfo &MRI) {
959
+ TargetLowering::AddrMode AM;
960
+ auto *MF = MI->getMF ();
961
+ auto *Addr = getOpcodeDef<GPtrAdd>(MI->getPointerReg (), MRI);
962
+ if (!Addr)
963
+ return false ;
964
+
965
+ AM.HasBaseReg = true ;
966
+ if (auto CstOff = getIConstantVRegVal (Addr->getOffsetReg (), MRI))
967
+ AM.BaseOffs = CstOff->getSExtValue (); // [reg +/- imm]
968
+ else
969
+ AM.Scale = 1 ; // [reg +/- reg]
970
+
971
+ return TLI.isLegalAddressingMode (
972
+ MF->getDataLayout (), AM,
973
+ getTypeForLLT (MI->getMMO ().getMemoryType (),
974
+ MF->getFunction ().getContext ()),
975
+ MI->getMMO ().getAddrSpace ());
976
+ }
977
+
978
+ static unsigned getIndexedOpc (unsigned LdStOpc) {
979
+ switch (LdStOpc) {
980
+ case TargetOpcode::G_LOAD:
981
+ return TargetOpcode::G_INDEXED_LOAD;
982
+ case TargetOpcode::G_STORE:
983
+ return TargetOpcode::G_INDEXED_STORE;
984
+ case TargetOpcode::G_ZEXTLOAD:
985
+ return TargetOpcode::G_INDEXED_ZEXTLOAD;
986
+ case TargetOpcode::G_SEXTLOAD:
987
+ return TargetOpcode::G_INDEXED_SEXTLOAD;
988
+ default :
989
+ llvm_unreachable (" Unexpected opcode" );
990
+ }
991
+ }
992
+
993
+ bool CombinerHelper::isIndexedLoadStoreLegal (GLoadStore &LdSt) const {
994
+ // Check for legality.
995
+ LLT PtrTy = MRI.getType (LdSt.getPointerReg ());
996
+ LLT Ty = MRI.getType (LdSt.getReg (0 ));
997
+ LLT MemTy = LdSt.getMMO ().getMemoryType ();
998
+ SmallVector<LegalityQuery::MemDesc, 2 > MemDescrs (
999
+ {{MemTy, MemTy.getSizeInBits (), AtomicOrdering::NotAtomic}});
1000
+ unsigned IndexedOpc = getIndexedOpc (LdSt.getOpcode ());
1001
+ SmallVector<LLT> OpTys;
1002
+ if (IndexedOpc == TargetOpcode::G_INDEXED_STORE)
1003
+ OpTys = {PtrTy, Ty, Ty};
1004
+ else
1005
+ OpTys = {Ty, PtrTy}; // For G_INDEXED_LOAD, G_INDEXED_[SZ]EXTLOAD
1006
+
1007
+ LegalityQuery Q (IndexedOpc, OpTys, MemDescrs);
1008
+ return isLegal (Q);
1009
+ }
1010
+
1011
+ static cl::opt<unsigned > PostIndexUseThreshold (
1012
+ " post-index-use-threshold" , cl::Hidden, cl::init(32 ),
1013
+ cl::desc(" Number of uses of a base pointer to check before it is no longer "
1014
+ " considered for post-indexing." ));
1015
+
948
1016
bool CombinerHelper::findPostIndexCandidate (GLoadStore &LdSt, Register &Addr,
949
- Register &Base, Register &Offset) {
950
- auto &MF = *LdSt.getParent ()->getParent ();
951
- const auto &TLI = *MF.getSubtarget ().getTargetLowering ();
1017
+ Register &Base, Register &Offset,
1018
+ bool &RematOffset) {
1019
+ // We're looking for the following pattern, for either load or store:
1020
+ // %baseptr:_(p0) = ...
1021
+ // G_STORE %val(s64), %baseptr(p0)
1022
+ // %offset:_(s64) = G_CONSTANT i64 -256
1023
+ // %new_addr:_(p0) = G_PTR_ADD %baseptr, %offset(s64)
1024
+ const auto &TLI = getTargetLowering ();
1025
+
1026
+ Register Ptr = LdSt.getPointerReg ();
1027
+ // If the store is the only use, don't bother.
1028
+ if (MRI.hasOneNonDBGUse (Ptr))
1029
+ return false ;
952
1030
953
- Base = LdSt.getPointerReg ();
1031
+ if (!isIndexedLoadStoreLegal (LdSt))
1032
+ return false ;
954
1033
955
- if (getOpcodeDef (TargetOpcode::G_FRAME_INDEX, Base , MRI))
1034
+ if (getOpcodeDef (TargetOpcode::G_FRAME_INDEX, Ptr , MRI))
956
1035
return false ;
957
1036
958
- // FIXME: The following use traversal needs a bail out for patholigical cases.
959
- for (auto &Use : MRI.use_nodbg_instructions (Base)) {
1037
+ MachineInstr *StoredValDef = getDefIgnoringCopies (LdSt.getReg (0 ), MRI);
1038
+ auto *PtrDef = MRI.getVRegDef (Ptr);
1039
+
1040
+ unsigned NumUsesChecked = 0 ;
1041
+ for (auto &Use : MRI.use_nodbg_instructions (Ptr)) {
1042
+ if (++NumUsesChecked > PostIndexUseThreshold)
1043
+ return false ; // Try to avoid exploding compile time.
1044
+
960
1045
auto *PtrAdd = dyn_cast<GPtrAdd>(&Use);
961
- if (!PtrAdd)
1046
+ // The use itself might be dead. This can happen during combines if DCE
1047
+ // hasn't had a chance to run yet. Don't allow it to form an indexed op.
1048
+ if (!PtrAdd || MRI.use_nodbg_empty (PtrAdd->getReg (0 )))
1049
+ continue ;
1050
+
1051
+ // Check the user of this isn't the store, otherwise we'd be generate a
1052
+ // indexed store defining its own use.
1053
+ if (StoredValDef == &Use)
962
1054
continue ;
963
1055
964
1056
Offset = PtrAdd->getOffsetReg ();
965
1057
if (!ForceLegalIndexing &&
966
- !TLI.isIndexingLegal (LdSt, Base, Offset, /* IsPre*/ false , MRI))
1058
+ !TLI.isIndexingLegal (LdSt, PtrAdd->getBaseReg (), Offset,
1059
+ /* IsPre*/ false , MRI))
967
1060
continue ;
968
1061
969
1062
// Make sure the offset calculation is before the potentially indexed op.
970
1063
MachineInstr *OffsetDef = MRI.getVRegDef (Offset);
971
- if (!dominates (*OffsetDef, LdSt))
972
- continue ;
1064
+ RematOffset = false ;
1065
+ if (!dominates (*OffsetDef, LdSt)) {
1066
+ // If the offset however is just a G_CONSTANT, we can always just
1067
+ // rematerialize it where we need it.
1068
+ if (OffsetDef->getOpcode () != TargetOpcode::G_CONSTANT)
1069
+ continue ;
1070
+ RematOffset = true ;
1071
+ }
973
1072
974
- // FIXME: check whether all uses of Base are load/store with foldable
975
- // addressing modes. If so, using the normal addr-modes is better than
976
- // forming an indexed one.
977
- if (any_of (MRI.use_nodbg_instructions (PtrAdd->getReg (0 )),
978
- [&](MachineInstr &PtrAddUse) {
979
- return !dominates (LdSt, PtrAddUse);
980
- }))
981
- continue ;
1073
+ for (auto &BasePtrUse : MRI.use_nodbg_instructions (PtrAdd->getBaseReg ())) {
1074
+ if (&BasePtrUse == PtrDef)
1075
+ continue ;
1076
+
1077
+ // If the user is a later load/store that can be post-indexed, then don't
1078
+ // combine this one.
1079
+ auto *BasePtrLdSt = dyn_cast<GLoadStore>(&BasePtrUse);
1080
+ if (BasePtrLdSt && BasePtrLdSt != &LdSt &&
1081
+ dominates (LdSt, *BasePtrLdSt) &&
1082
+ isIndexedLoadStoreLegal (*BasePtrLdSt))
1083
+ return false ;
1084
+
1085
+ // Now we're looking for the key G_PTR_ADD instruction, which contains
1086
+ // the offset add that we want to fold.
1087
+ if (auto *BasePtrUseDef = dyn_cast<GPtrAdd>(&BasePtrUse)) {
1088
+ Register PtrAddDefReg = BasePtrUseDef->getReg (0 );
1089
+ for (auto &BaseUseUse : MRI.use_nodbg_instructions (PtrAddDefReg)) {
1090
+ // If the use is in a different block, then we may produce worse code
1091
+ // due to the extra register pressure.
1092
+ if (BaseUseUse.getParent () != LdSt.getParent ())
1093
+ return false ;
1094
+
1095
+ if (auto *UseUseLdSt = dyn_cast<GLoadStore>(&BaseUseUse))
1096
+ if (canFoldInAddressingMode (UseUseLdSt, TLI, MRI))
1097
+ return false ;
1098
+ }
1099
+ if (!dominates (LdSt, BasePtrUse))
1100
+ return false ; // All use must be dominated by the load/store.
1101
+ }
1102
+ }
982
1103
983
1104
Addr = PtrAdd->getReg (0 );
1105
+ Base = PtrAdd->getBaseReg ();
984
1106
return true ;
985
1107
}
986
1108
@@ -1001,6 +1123,9 @@ bool CombinerHelper::findPreIndexCandidate(GLoadStore &LdSt, Register &Addr,
1001
1123
!TLI.isIndexingLegal (LdSt, Base, Offset, /* IsPre*/ true , MRI))
1002
1124
return false ;
1003
1125
1126
+ if (!isIndexedLoadStoreLegal (LdSt))
1127
+ return false ;
1128
+
1004
1129
MachineInstr *BaseDef = getDefIgnoringCopies (Base, MRI);
1005
1130
if (BaseDef->getOpcode () == TargetOpcode::G_FRAME_INDEX)
1006
1131
return false ;
@@ -1027,16 +1152,14 @@ bool CombinerHelper::matchCombineIndexedLoadStore(
1027
1152
MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) {
1028
1153
auto &LdSt = cast<GLoadStore>(MI);
1029
1154
1030
- // For now, no targets actually support these opcodes so don't waste time
1031
- // running these unless we're forced to for testing.
1032
- if (!ForceLegalIndexing)
1155
+ if (LdSt.isAtomic ())
1033
1156
return false ;
1034
1157
1035
1158
MatchInfo.IsPre = findPreIndexCandidate (LdSt, MatchInfo.Addr , MatchInfo.Base ,
1036
1159
MatchInfo.Offset );
1037
1160
if (!MatchInfo.IsPre &&
1038
1161
!findPostIndexCandidate (LdSt, MatchInfo.Addr , MatchInfo.Base ,
1039
- MatchInfo.Offset ))
1162
+ MatchInfo.Offset , MatchInfo. RematOffset ))
1040
1163
return false ;
1041
1164
1042
1165
return true ;
@@ -1045,28 +1168,21 @@ bool CombinerHelper::matchCombineIndexedLoadStore(
1045
1168
void CombinerHelper::applyCombineIndexedLoadStore (
1046
1169
MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) {
1047
1170
MachineInstr &AddrDef = *MRI.getUniqueVRegDef (MatchInfo.Addr );
1048
- MachineIRBuilder MIRBuilder (MI);
1171
+ Builder. setInstrAndDebugLoc (MI);
1049
1172
unsigned Opcode = MI.getOpcode ();
1050
1173
bool IsStore = Opcode == TargetOpcode::G_STORE;
1051
- unsigned NewOpcode;
1052
- switch (Opcode) {
1053
- case TargetOpcode::G_LOAD:
1054
- NewOpcode = TargetOpcode::G_INDEXED_LOAD;
1055
- break ;
1056
- case TargetOpcode::G_SEXTLOAD:
1057
- NewOpcode = TargetOpcode::G_INDEXED_SEXTLOAD;
1058
- break ;
1059
- case TargetOpcode::G_ZEXTLOAD:
1060
- NewOpcode = TargetOpcode::G_INDEXED_ZEXTLOAD;
1061
- break ;
1062
- case TargetOpcode::G_STORE:
1063
- NewOpcode = TargetOpcode::G_INDEXED_STORE;
1064
- break ;
1065
- default :
1066
- llvm_unreachable (" Unknown load/store opcode" );
1174
+ unsigned NewOpcode = getIndexedOpc (Opcode);
1175
+
1176
+ // If the offset constant didn't happen to dominate the load/store, we can
1177
+ // just clone it as needed.
1178
+ if (MatchInfo.RematOffset ) {
1179
+ auto *OldCst = MRI.getVRegDef (MatchInfo.Offset );
1180
+ auto NewCst = Builder.buildConstant (MRI.getType (MatchInfo.Offset ),
1181
+ *OldCst->getOperand (1 ).getCImm ());
1182
+ MatchInfo.Offset = NewCst.getReg (0 );
1067
1183
}
1068
1184
1069
- auto MIB = MIRBuilder .buildInstr (NewOpcode);
1185
+ auto MIB = Builder .buildInstr (NewOpcode);
1070
1186
if (IsStore) {
1071
1187
MIB.addDef (MatchInfo.Addr );
1072
1188
MIB.addUse (MI.getOperand (0 ).getReg ());
@@ -1245,13 +1361,7 @@ void CombinerHelper::applyOptBrCondByInvertingCond(MachineInstr &MI,
1245
1361
Observer.changedInstr (*BrCond);
1246
1362
}
1247
1363
1248
- static Type *getTypeForLLT (LLT Ty, LLVMContext &C) {
1249
- if (Ty.isVector ())
1250
- return FixedVectorType::get (IntegerType::get (C, Ty.getScalarSizeInBits ()),
1251
- Ty.getNumElements ());
1252
- return IntegerType::get (C, Ty.getSizeInBits ());
1253
- }
1254
-
1364
+
1255
1365
bool CombinerHelper::tryEmitMemcpyInline (MachineInstr &MI) {
1256
1366
MachineIRBuilder HelperBuilder (MI);
1257
1367
GISelObserverWrapper DummyObserver;
0 commit comments