@@ -149,6 +149,11 @@ MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden,
149
149
cl::desc("Maximum interleave factor for MVE VLDn to generate."),
150
150
cl::init(2));
151
151
152
+ cl::opt<unsigned> ArmMaxBaseUpdatesToCheck(
153
+ "arm-max-base-updates-to-check", cl::Hidden,
154
+ cl::desc("Maximum number of base-updates to check generating postindex."),
155
+ cl::init(64));
156
+
152
157
/// Value type used for "flags" operands / results (either CPSR or FPSCR_NZCV).
153
158
constexpr MVT FlagsVT = MVT::i32;
154
159
@@ -15865,6 +15870,22 @@ struct BaseUpdateUser {
15865
15870
unsigned ConstInc;
15866
15871
};
15867
15872
15873
+ static bool isValidBaseUpdate(SDNode *N, SDNode *User) {
15874
+ // Check that the add is independent of the load/store.
15875
+ // Otherwise, folding it would create a cycle. Search through Addr
15876
+ // as well, since the User may not be a direct user of Addr and
15877
+ // only share a base pointer.
15878
+ SmallPtrSet<const SDNode *, 32> Visited;
15879
+ SmallVector<const SDNode *, 16> Worklist;
15880
+ Worklist.push_back(N);
15881
+ Worklist.push_back(User);
15882
+ const unsigned MaxSteps = 1024;
15883
+ if (SDNode::hasPredecessorHelper(N, Visited, Worklist, MaxSteps) ||
15884
+ SDNode::hasPredecessorHelper(User, Visited, Worklist, MaxSteps))
15885
+ return false;
15886
+ return true;
15887
+ }
15888
+
15868
15889
static bool TryCombineBaseUpdate(struct BaseUpdateTarget &Target,
15869
15890
struct BaseUpdateUser &User,
15870
15891
bool SimpleConstIncOnly,
@@ -16066,6 +16087,9 @@ static bool TryCombineBaseUpdate(struct BaseUpdateTarget &Target,
16066
16087
if (SimpleConstIncOnly && User.ConstInc != NumBytes)
16067
16088
return false;
16068
16089
16090
+ if (!isValidBaseUpdate(N, User.N))
16091
+ return false;
16092
+
16069
16093
// OK, we found an ADD we can fold into the base update.
16070
16094
// Now, create a _UPD node, taking care of not breaking alignment.
16071
16095
@@ -16214,21 +16238,6 @@ static bool findPointerConstIncrement(SDNode *N, SDValue *Ptr, SDValue *CInc) {
16214
16238
}
16215
16239
}
16216
16240
16217
- static bool isValidBaseUpdate(SDNode *N, SDNode *User) {
16218
- // Check that the add is independent of the load/store.
16219
- // Otherwise, folding it would create a cycle. Search through Addr
16220
- // as well, since the User may not be a direct user of Addr and
16221
- // only share a base pointer.
16222
- SmallPtrSet<const SDNode *, 32> Visited;
16223
- SmallVector<const SDNode *, 16> Worklist;
16224
- Worklist.push_back(N);
16225
- Worklist.push_back(User);
16226
- if (SDNode::hasPredecessorHelper(N, Visited, Worklist) ||
16227
- SDNode::hasPredecessorHelper(User, Visited, Worklist))
16228
- return false;
16229
- return true;
16230
- }
16231
-
16232
16241
/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP,
16233
16242
/// NEON load/store intrinsics, and generic vector load/stores, to merge
16234
16243
/// base address updates.
@@ -16242,6 +16251,10 @@ static SDValue CombineBaseUpdate(SDNode *N,
16242
16251
const unsigned AddrOpIdx = ((isIntrinsic || isStore) ? 2 : 1);
16243
16252
BaseUpdateTarget Target = {N, isIntrinsic, isStore, AddrOpIdx};
16244
16253
16254
+ // Limit the number of possible base-updates we look at to prevent degenerate
16255
+ // cases.
16256
+ unsigned MaxBaseUpdates = ArmMaxBaseUpdatesToCheck;
16257
+
16245
16258
SDValue Addr = N->getOperand(AddrOpIdx);
16246
16259
16247
16260
SmallVector<BaseUpdateUser, 8> BaseUpdates;
@@ -16256,8 +16269,11 @@ static SDValue CombineBaseUpdate(SDNode *N,
16256
16269
unsigned ConstInc =
16257
16270
getPointerConstIncrement(User->getOpcode(), Addr, Inc, DCI.DAG);
16258
16271
16259
- if (ConstInc || User->getOpcode() == ISD::ADD)
16272
+ if (ConstInc || User->getOpcode() == ISD::ADD) {
16260
16273
BaseUpdates.push_back({User, Inc, ConstInc});
16274
+ if (BaseUpdates.size() >= MaxBaseUpdates)
16275
+ break;
16276
+ }
16261
16277
}
16262
16278
16263
16279
// If the address is a constant pointer increment itself, find
@@ -16284,27 +16300,19 @@ static SDValue CombineBaseUpdate(SDNode *N,
16284
16300
unsigned NewConstInc = UserOffset - Offset;
16285
16301
SDValue NewInc = DCI.DAG.getConstant(NewConstInc, SDLoc(N), MVT::i32);
16286
16302
BaseUpdates.push_back({User, NewInc, NewConstInc});
16303
+ if (BaseUpdates.size() >= MaxBaseUpdates)
16304
+ break;
16287
16305
}
16288
16306
}
16289
16307
16290
16308
// Try to fold the load/store with an update that matches memory
16291
16309
// access size. This should work well for sequential loads.
16292
- //
16293
- // Filter out invalid updates as well.
16294
16310
unsigned NumValidUpd = BaseUpdates.size();
16295
- for (unsigned I = 0; I < NumValidUpd;) {
16311
+ for (unsigned I = 0; I < NumValidUpd; I++ ) {
16296
16312
BaseUpdateUser &User = BaseUpdates[I];
16297
- if (!isValidBaseUpdate(N, User.N)) {
16298
- --NumValidUpd;
16299
- std::swap(BaseUpdates[I], BaseUpdates[NumValidUpd]);
16300
- continue;
16301
- }
16302
-
16303
16313
if (TryCombineBaseUpdate(Target, User, /*SimpleConstIncOnly=*/true, DCI))
16304
16314
return SDValue();
16305
- ++I;
16306
16315
}
16307
- BaseUpdates.resize(NumValidUpd);
16308
16316
16309
16317
// Try to fold with other users. Non-constant updates are considered
16310
16318
// first, and constant updates are sorted to not break a sequence of
@@ -16360,8 +16368,9 @@ static SDValue PerformMVEVLDCombine(SDNode *N,
16360
16368
Visited.insert(Addr.getNode());
16361
16369
Worklist.push_back(N);
16362
16370
Worklist.push_back(User);
16363
- if (SDNode::hasPredecessorHelper(N, Visited, Worklist) ||
16364
- SDNode::hasPredecessorHelper(User, Visited, Worklist))
16371
+ const unsigned MaxSteps = 1024;
16372
+ if (SDNode::hasPredecessorHelper(N, Visited, Worklist, MaxSteps) ||
16373
+ SDNode::hasPredecessorHelper(User, Visited, Worklist, MaxSteps))
16365
16374
continue;
16366
16375
16367
16376
// Find the new opcode for the updating load/store.
0 commit comments