@@ -149,6 +149,11 @@ MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden,
149
149
cl::desc("Maximum interleave factor for MVE VLDn to generate."),
150
150
cl::init(2));
151
151
152
+ cl::opt<unsigned> ArmMaxBaseUpdatesToCheck(
153
+ "arm-max-base-updates-to-check", cl::Hidden,
154
+ cl::desc("Maximum number of base-updates to check generating postindex."),
155
+ cl::init(64));
156
+
152
157
/// Value type used for "flags" operands / results (either CPSR or FPSCR_NZCV).
153
158
constexpr MVT FlagsVT = MVT::i32;
154
159
@@ -15842,6 +15847,22 @@ struct BaseUpdateUser {
15842
15847
unsigned ConstInc;
15843
15848
};
15844
15849
15850
+ static bool isValidBaseUpdate(SDNode *N, SDNode *User) {
15851
+ // Check that the add is independent of the load/store.
15852
+ // Otherwise, folding it would create a cycle. Search through Addr
15853
+ // as well, since the User may not be a direct user of Addr and
15854
+ // only share a base pointer.
15855
+ SmallPtrSet<const SDNode *, 32> Visited;
15856
+ SmallVector<const SDNode *, 16> Worklist;
15857
+ Worklist.push_back(N);
15858
+ Worklist.push_back(User);
15859
+ const unsigned MaxSteps = 1024;
15860
+ if (SDNode::hasPredecessorHelper(N, Visited, Worklist, MaxSteps) ||
15861
+ SDNode::hasPredecessorHelper(User, Visited, Worklist, MaxSteps))
15862
+ return false;
15863
+ return true;
15864
+ }
15865
+
15845
15866
static bool TryCombineBaseUpdate(struct BaseUpdateTarget &Target,
15846
15867
struct BaseUpdateUser &User,
15847
15868
bool SimpleConstIncOnly,
@@ -16043,6 +16064,9 @@ static bool TryCombineBaseUpdate(struct BaseUpdateTarget &Target,
16043
16064
if (SimpleConstIncOnly && User.ConstInc != NumBytes)
16044
16065
return false;
16045
16066
16067
+ if (!isValidBaseUpdate(N, User.N))
16068
+ return false;
16069
+
16046
16070
// OK, we found an ADD we can fold into the base update.
16047
16071
// Now, create a _UPD node, taking care of not breaking alignment.
16048
16072
@@ -16191,21 +16215,6 @@ static bool findPointerConstIncrement(SDNode *N, SDValue *Ptr, SDValue *CInc) {
16191
16215
}
16192
16216
}
16193
16217
16194
- static bool isValidBaseUpdate(SDNode *N, SDNode *User) {
16195
- // Check that the add is independent of the load/store.
16196
- // Otherwise, folding it would create a cycle. Search through Addr
16197
- // as well, since the User may not be a direct user of Addr and
16198
- // only share a base pointer.
16199
- SmallPtrSet<const SDNode *, 32> Visited;
16200
- SmallVector<const SDNode *, 16> Worklist;
16201
- Worklist.push_back(N);
16202
- Worklist.push_back(User);
16203
- if (SDNode::hasPredecessorHelper(N, Visited, Worklist) ||
16204
- SDNode::hasPredecessorHelper(User, Visited, Worklist))
16205
- return false;
16206
- return true;
16207
- }
16208
-
16209
16218
/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP,
16210
16219
/// NEON load/store intrinsics, and generic vector load/stores, to merge
16211
16220
/// base address updates.
@@ -16219,6 +16228,10 @@ static SDValue CombineBaseUpdate(SDNode *N,
16219
16228
const unsigned AddrOpIdx = ((isIntrinsic || isStore) ? 2 : 1);
16220
16229
BaseUpdateTarget Target = {N, isIntrinsic, isStore, AddrOpIdx};
16221
16230
16231
+ // Limit the number of possible base-updates we look at to prevent degenerate
16232
+ // cases.
16233
+ unsigned MaxBaseUpdates = ArmMaxBaseUpdatesToCheck;
16234
+
16222
16235
SDValue Addr = N->getOperand(AddrOpIdx);
16223
16236
16224
16237
SmallVector<BaseUpdateUser, 8> BaseUpdates;
@@ -16233,8 +16246,11 @@ static SDValue CombineBaseUpdate(SDNode *N,
16233
16246
unsigned ConstInc =
16234
16247
getPointerConstIncrement(User->getOpcode(), Addr, Inc, DCI.DAG);
16235
16248
16236
- if (ConstInc || User->getOpcode() == ISD::ADD)
16249
+ if (ConstInc || User->getOpcode() == ISD::ADD) {
16237
16250
BaseUpdates.push_back({User, Inc, ConstInc});
16251
+ if (BaseUpdates.size() >= MaxBaseUpdates)
16252
+ break;
16253
+ }
16238
16254
}
16239
16255
16240
16256
// If the address is a constant pointer increment itself, find
@@ -16261,27 +16277,19 @@ static SDValue CombineBaseUpdate(SDNode *N,
16261
16277
unsigned NewConstInc = UserOffset - Offset;
16262
16278
SDValue NewInc = DCI.DAG.getConstant(NewConstInc, SDLoc(N), MVT::i32);
16263
16279
BaseUpdates.push_back({User, NewInc, NewConstInc});
16280
+ if (BaseUpdates.size() >= MaxBaseUpdates)
16281
+ break;
16264
16282
}
16265
16283
}
16266
16284
16267
16285
// Try to fold the load/store with an update that matches memory
16268
16286
// access size. This should work well for sequential loads.
16269
- //
16270
- // Filter out invalid updates as well.
16271
16287
unsigned NumValidUpd = BaseUpdates.size();
16272
- for (unsigned I = 0; I < NumValidUpd;) {
16288
+ for (unsigned I = 0; I < NumValidUpd; I++ ) {
16273
16289
BaseUpdateUser &User = BaseUpdates[I];
16274
- if (!isValidBaseUpdate(N, User.N)) {
16275
- --NumValidUpd;
16276
- std::swap(BaseUpdates[I], BaseUpdates[NumValidUpd]);
16277
- continue;
16278
- }
16279
-
16280
16290
if (TryCombineBaseUpdate(Target, User, /*SimpleConstIncOnly=*/true, DCI))
16281
16291
return SDValue();
16282
- ++I;
16283
16292
}
16284
- BaseUpdates.resize(NumValidUpd);
16285
16293
16286
16294
// Try to fold with other users. Non-constant updates are considered
16287
16295
// first, and constant updates are sorted to not break a sequence of
@@ -16337,8 +16345,9 @@ static SDValue PerformMVEVLDCombine(SDNode *N,
16337
16345
Visited.insert(Addr.getNode());
16338
16346
Worklist.push_back(N);
16339
16347
Worklist.push_back(User);
16340
- if (SDNode::hasPredecessorHelper(N, Visited, Worklist) ||
16341
- SDNode::hasPredecessorHelper(User, Visited, Worklist))
16348
+ const unsigned MaxSteps = 1024;
16349
+ if (SDNode::hasPredecessorHelper(N, Visited, Worklist, MaxSteps) ||
16350
+ SDNode::hasPredecessorHelper(User, Visited, Worklist, MaxSteps))
16342
16351
continue;
16343
16352
16344
16353
// Find the new opcode for the updating load/store.
0 commit comments