@@ -149,6 +149,11 @@ MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden,
149
149
cl::desc("Maximum interleave factor for MVE VLDn to generate."),
150
150
cl::init(2));
151
151
152
+ cl::opt<unsigned> ArmMaxBaseUpdatesToCheck(
153
+ "arm-max-base-updates-to-check", cl::Hidden,
154
+ cl::desc("Maximum number of base-updates to check generating postindex."),
155
+ cl::init(64));
156
+
152
157
/// Value type used for "flags" operands / results (either CPSR or FPSCR_NZCV).
153
158
constexpr MVT FlagsVT = MVT::i32;
154
159
@@ -15865,6 +15870,21 @@ struct BaseUpdateUser {
15865
15870
unsigned ConstInc;
15866
15871
};
15867
15872
15873
+ static bool isValidBaseUpdate(SDNode *N, SDNode *User) {
15874
+ // Check that the add is independent of the load/store.
15875
+ // Otherwise, folding it would create a cycle. Search through Addr
15876
+ // as well, since the User may not be a direct user of Addr and
15877
+ // only share a base pointer.
15878
+ SmallPtrSet<const SDNode *, 32> Visited;
15879
+ SmallVector<const SDNode *, 16> Worklist;
15880
+ Worklist.push_back(N);
15881
+ Worklist.push_back(User);
15882
+ if (SDNode::hasPredecessorHelper(N, Visited, Worklist, 1024) ||
15883
+ SDNode::hasPredecessorHelper(User, Visited, Worklist, 1024))
15884
+ return false;
15885
+ return true;
15886
+ }
15887
+
15868
15888
static bool TryCombineBaseUpdate(struct BaseUpdateTarget &Target,
15869
15889
struct BaseUpdateUser &User,
15870
15890
bool SimpleConstIncOnly,
@@ -16066,6 +16086,9 @@ static bool TryCombineBaseUpdate(struct BaseUpdateTarget &Target,
16066
16086
if (SimpleConstIncOnly && User.ConstInc != NumBytes)
16067
16087
return false;
16068
16088
16089
+ if (!isValidBaseUpdate(N, User.N))
16090
+ return false;
16091
+
16069
16092
// OK, we found an ADD we can fold into the base update.
16070
16093
// Now, create a _UPD node, taking care of not breaking alignment.
16071
16094
@@ -16214,21 +16237,6 @@ static bool findPointerConstIncrement(SDNode *N, SDValue *Ptr, SDValue *CInc) {
16214
16237
}
16215
16238
}
16216
16239
16217
- static bool isValidBaseUpdate(SDNode *N, SDNode *User) {
16218
- // Check that the add is independent of the load/store.
16219
- // Otherwise, folding it would create a cycle. Search through Addr
16220
- // as well, since the User may not be a direct user of Addr and
16221
- // only share a base pointer.
16222
- SmallPtrSet<const SDNode *, 32> Visited;
16223
- SmallVector<const SDNode *, 16> Worklist;
16224
- Worklist.push_back(N);
16225
- Worklist.push_back(User);
16226
- if (SDNode::hasPredecessorHelper(N, Visited, Worklist) ||
16227
- SDNode::hasPredecessorHelper(User, Visited, Worklist))
16228
- return false;
16229
- return true;
16230
- }
16231
-
16232
16240
/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP,
16233
16241
/// NEON load/store intrinsics, and generic vector load/stores, to merge
16234
16242
/// base address updates.
@@ -16242,6 +16250,10 @@ static SDValue CombineBaseUpdate(SDNode *N,
16242
16250
const unsigned AddrOpIdx = ((isIntrinsic || isStore) ? 2 : 1);
16243
16251
BaseUpdateTarget Target = {N, isIntrinsic, isStore, AddrOpIdx};
16244
16252
16253
+ // Limit the number of possible base-updates we look at to prevent degenerate
16254
+ // cases.
16255
+ unsigned MaxBaseUpdates = ArmMaxBaseUpdatesToCheck;
16256
+
16245
16257
SDValue Addr = N->getOperand(AddrOpIdx);
16246
16258
16247
16259
SmallVector<BaseUpdateUser, 8> BaseUpdates;
@@ -16256,8 +16268,11 @@ static SDValue CombineBaseUpdate(SDNode *N,
16256
16268
unsigned ConstInc =
16257
16269
getPointerConstIncrement(User->getOpcode(), Addr, Inc, DCI.DAG);
16258
16270
16259
- if (ConstInc || User->getOpcode() == ISD::ADD)
16271
+ if (ConstInc || User->getOpcode() == ISD::ADD) {
16260
16272
BaseUpdates.push_back({User, Inc, ConstInc});
16273
+ if (BaseUpdates.size() > MaxBaseUpdates)
16274
+ break;
16275
+ }
16261
16276
}
16262
16277
16263
16278
// If the address is a constant pointer increment itself, find
@@ -16284,27 +16299,19 @@ static SDValue CombineBaseUpdate(SDNode *N,
16284
16299
unsigned NewConstInc = UserOffset - Offset;
16285
16300
SDValue NewInc = DCI.DAG.getConstant(NewConstInc, SDLoc(N), MVT::i32);
16286
16301
BaseUpdates.push_back({User, NewInc, NewConstInc});
16302
+ if (BaseUpdates.size() > MaxBaseUpdates)
16303
+ break;
16287
16304
}
16288
16305
}
16289
16306
16290
16307
// Try to fold the load/store with an update that matches memory
16291
16308
// access size. This should work well for sequential loads.
16292
- //
16293
- // Filter out invalid updates as well.
16294
16309
unsigned NumValidUpd = BaseUpdates.size();
16295
- for (unsigned I = 0; I < NumValidUpd;) {
16310
+ for (unsigned I = 0; I < NumValidUpd; I++ ) {
16296
16311
BaseUpdateUser &User = BaseUpdates[I];
16297
- if (!isValidBaseUpdate(N, User.N)) {
16298
- --NumValidUpd;
16299
- std::swap(BaseUpdates[I], BaseUpdates[NumValidUpd]);
16300
- continue;
16301
- }
16302
-
16303
16312
if (TryCombineBaseUpdate(Target, User, /*SimpleConstIncOnly=*/true, DCI))
16304
16313
return SDValue();
16305
- ++I;
16306
16314
}
16307
- BaseUpdates.resize(NumValidUpd);
16308
16315
16309
16316
// Try to fold with other users. Non-constant updates are considered
16310
16317
// first, and constant updates are sorted to not break a sequence of
@@ -16360,8 +16367,8 @@ static SDValue PerformMVEVLDCombine(SDNode *N,
16360
16367
Visited.insert(Addr.getNode());
16361
16368
Worklist.push_back(N);
16362
16369
Worklist.push_back(User);
16363
- if (SDNode::hasPredecessorHelper(N, Visited, Worklist) ||
16364
- SDNode::hasPredecessorHelper(User, Visited, Worklist))
16370
+ if (SDNode::hasPredecessorHelper(N, Visited, Worklist, 1024 ) ||
16371
+ SDNode::hasPredecessorHelper(User, Visited, Worklist, 1024 ))
16365
16372
continue;
16366
16373
16367
16374
// Find the new opcode for the updating load/store.
0 commit comments