Skip to content

Commit d6d1dbf

Browse files
davemgreentstellar
authored andcommitted
[ARM] Speedups for CombineBaseUpdate. (llvm#129725)
This attempts to put limits onto CombineBaseUpdate for degenerate cases like llvm#127477. The biggest change is to add a limit to the number of base updates to check in CombineBaseUpdate. 64 is hopefully plenty high enough for most runtime unrolled loops to generate postinc where they are beneficial. It also moves the check for isValidBaseUpdate later so that it only happens if we will generate a valid instruction. The 1024 limit to hasPredecessorHelper comes from the X86 backend, which uses the same limit. I haven't added a test case as it would need to be very big and my attempts at generating a smaller version did not show anything useful. Fixes llvm#127477. (cherry picked from commit 86cf4ed)
1 parent 5ba1949 commit d6d1dbf

File tree

1 file changed

+38
-29
lines changed

1 file changed

+38
-29
lines changed

llvm/lib/Target/ARM/ARMISelLowering.cpp

Lines changed: 38 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,11 @@ MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden,
149149
cl::desc("Maximum interleave factor for MVE VLDn to generate."),
150150
cl::init(2));
151151

152+
cl::opt<unsigned> ArmMaxBaseUpdatesToCheck(
153+
"arm-max-base-updates-to-check", cl::Hidden,
154+
cl::desc("Maximum number of base-updates to check generating postindex."),
155+
cl::init(64));
156+
152157
/// Value type used for "flags" operands / results (either CPSR or FPSCR_NZCV).
153158
constexpr MVT FlagsVT = MVT::i32;
154159

@@ -15842,6 +15847,22 @@ struct BaseUpdateUser {
1584215847
unsigned ConstInc;
1584315848
};
1584415849

15850+
static bool isValidBaseUpdate(SDNode *N, SDNode *User) {
15851+
// Check that the add is independent of the load/store.
15852+
// Otherwise, folding it would create a cycle. Search through Addr
15853+
// as well, since the User may not be a direct user of Addr and
15854+
// only share a base pointer.
15855+
SmallPtrSet<const SDNode *, 32> Visited;
15856+
SmallVector<const SDNode *, 16> Worklist;
15857+
Worklist.push_back(N);
15858+
Worklist.push_back(User);
15859+
const unsigned MaxSteps = 1024;
15860+
if (SDNode::hasPredecessorHelper(N, Visited, Worklist, MaxSteps) ||
15861+
SDNode::hasPredecessorHelper(User, Visited, Worklist, MaxSteps))
15862+
return false;
15863+
return true;
15864+
}
15865+
1584515866
static bool TryCombineBaseUpdate(struct BaseUpdateTarget &Target,
1584615867
struct BaseUpdateUser &User,
1584715868
bool SimpleConstIncOnly,
@@ -16043,6 +16064,9 @@ static bool TryCombineBaseUpdate(struct BaseUpdateTarget &Target,
1604316064
if (SimpleConstIncOnly && User.ConstInc != NumBytes)
1604416065
return false;
1604516066

16067+
if (!isValidBaseUpdate(N, User.N))
16068+
return false;
16069+
1604616070
// OK, we found an ADD we can fold into the base update.
1604716071
// Now, create a _UPD node, taking care of not breaking alignment.
1604816072

@@ -16191,21 +16215,6 @@ static bool findPointerConstIncrement(SDNode *N, SDValue *Ptr, SDValue *CInc) {
1619116215
}
1619216216
}
1619316217

16194-
static bool isValidBaseUpdate(SDNode *N, SDNode *User) {
16195-
// Check that the add is independent of the load/store.
16196-
// Otherwise, folding it would create a cycle. Search through Addr
16197-
// as well, since the User may not be a direct user of Addr and
16198-
// only share a base pointer.
16199-
SmallPtrSet<const SDNode *, 32> Visited;
16200-
SmallVector<const SDNode *, 16> Worklist;
16201-
Worklist.push_back(N);
16202-
Worklist.push_back(User);
16203-
if (SDNode::hasPredecessorHelper(N, Visited, Worklist) ||
16204-
SDNode::hasPredecessorHelper(User, Visited, Worklist))
16205-
return false;
16206-
return true;
16207-
}
16208-
1620916218
/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP,
1621016219
/// NEON load/store intrinsics, and generic vector load/stores, to merge
1621116220
/// base address updates.
@@ -16219,6 +16228,10 @@ static SDValue CombineBaseUpdate(SDNode *N,
1621916228
const unsigned AddrOpIdx = ((isIntrinsic || isStore) ? 2 : 1);
1622016229
BaseUpdateTarget Target = {N, isIntrinsic, isStore, AddrOpIdx};
1622116230

16231+
// Limit the number of possible base-updates we look at to prevent degenerate
16232+
// cases.
16233+
unsigned MaxBaseUpdates = ArmMaxBaseUpdatesToCheck;
16234+
1622216235
SDValue Addr = N->getOperand(AddrOpIdx);
1622316236

1622416237
SmallVector<BaseUpdateUser, 8> BaseUpdates;
@@ -16233,8 +16246,11 @@ static SDValue CombineBaseUpdate(SDNode *N,
1623316246
unsigned ConstInc =
1623416247
getPointerConstIncrement(User->getOpcode(), Addr, Inc, DCI.DAG);
1623516248

16236-
if (ConstInc || User->getOpcode() == ISD::ADD)
16249+
if (ConstInc || User->getOpcode() == ISD::ADD) {
1623716250
BaseUpdates.push_back({User, Inc, ConstInc});
16251+
if (BaseUpdates.size() >= MaxBaseUpdates)
16252+
break;
16253+
}
1623816254
}
1623916255

1624016256
// If the address is a constant pointer increment itself, find
@@ -16261,27 +16277,19 @@ static SDValue CombineBaseUpdate(SDNode *N,
1626116277
unsigned NewConstInc = UserOffset - Offset;
1626216278
SDValue NewInc = DCI.DAG.getConstant(NewConstInc, SDLoc(N), MVT::i32);
1626316279
BaseUpdates.push_back({User, NewInc, NewConstInc});
16280+
if (BaseUpdates.size() >= MaxBaseUpdates)
16281+
break;
1626416282
}
1626516283
}
1626616284

1626716285
// Try to fold the load/store with an update that matches memory
1626816286
// access size. This should work well for sequential loads.
16269-
//
16270-
// Filter out invalid updates as well.
1627116287
unsigned NumValidUpd = BaseUpdates.size();
16272-
for (unsigned I = 0; I < NumValidUpd;) {
16288+
for (unsigned I = 0; I < NumValidUpd; I++) {
1627316289
BaseUpdateUser &User = BaseUpdates[I];
16274-
if (!isValidBaseUpdate(N, User.N)) {
16275-
--NumValidUpd;
16276-
std::swap(BaseUpdates[I], BaseUpdates[NumValidUpd]);
16277-
continue;
16278-
}
16279-
1628016290
if (TryCombineBaseUpdate(Target, User, /*SimpleConstIncOnly=*/true, DCI))
1628116291
return SDValue();
16282-
++I;
1628316292
}
16284-
BaseUpdates.resize(NumValidUpd);
1628516293

1628616294
// Try to fold with other users. Non-constant updates are considered
1628716295
// first, and constant updates are sorted to not break a sequence of
@@ -16337,8 +16345,9 @@ static SDValue PerformMVEVLDCombine(SDNode *N,
1633716345
Visited.insert(Addr.getNode());
1633816346
Worklist.push_back(N);
1633916347
Worklist.push_back(User);
16340-
if (SDNode::hasPredecessorHelper(N, Visited, Worklist) ||
16341-
SDNode::hasPredecessorHelper(User, Visited, Worklist))
16348+
const unsigned MaxSteps = 1024;
16349+
if (SDNode::hasPredecessorHelper(N, Visited, Worklist, MaxSteps) ||
16350+
SDNode::hasPredecessorHelper(User, Visited, Worklist, MaxSteps))
1634216351
continue;
1634316352

1634416353
// Find the new opcode for the updating load/store.

0 commit comments

Comments
 (0)