Skip to content

Commit 0507796

Browse files
committed
[ARM] Speedups for CombineBaseUpdate.
This attempts to put limits onto CombineBaseUpdate for degenerate cases like 127477. The biggest change is to add a limit to the number of base updates to check in CombineBaseUpdate. 64 is hopefully plenty high enough for most runtime unrolled loops to generate postinc where they are beneficial. It also moves the check for isValidBaseUpdate later so that it only happens if we will generate a valid instruction. The 1024 limit to hasPredecessorHelper comes from the X86 backend, which uses the same limit. I haven't added a test case as it would need to be very big and my attempts at generating a smaller version did not show anything useful. Fixes #127477.
1 parent b673a59 commit 0507796

File tree

1 file changed

+36
-29
lines changed

1 file changed

+36
-29
lines changed

llvm/lib/Target/ARM/ARMISelLowering.cpp

Lines changed: 36 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,11 @@ MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden,
149149
cl::desc("Maximum interleave factor for MVE VLDn to generate."),
150150
cl::init(2));
151151

152+
cl::opt<unsigned> ArmMaxBaseUpdatesToCheck(
153+
"arm-max-base-updates-to-check", cl::Hidden,
154+
cl::desc("Maximum number of base-updates to check generating postindex."),
155+
cl::init(64));
156+
152157
/// Value type used for "flags" operands / results (either CPSR or FPSCR_NZCV).
153158
constexpr MVT FlagsVT = MVT::i32;
154159

@@ -15865,6 +15870,21 @@ struct BaseUpdateUser {
1586515870
unsigned ConstInc;
1586615871
};
1586715872

15873+
static bool isValidBaseUpdate(SDNode *N, SDNode *User) {
15874+
// Check that the add is independent of the load/store.
15875+
// Otherwise, folding it would create a cycle. Search through Addr
15876+
// as well, since the User may not be a direct user of Addr and
15877+
// only share a base pointer.
15878+
SmallPtrSet<const SDNode *, 32> Visited;
15879+
SmallVector<const SDNode *, 16> Worklist;
15880+
Worklist.push_back(N);
15881+
Worklist.push_back(User);
15882+
if (SDNode::hasPredecessorHelper(N, Visited, Worklist, 1024) ||
15883+
SDNode::hasPredecessorHelper(User, Visited, Worklist, 1024))
15884+
return false;
15885+
return true;
15886+
}
15887+
1586815888
static bool TryCombineBaseUpdate(struct BaseUpdateTarget &Target,
1586915889
struct BaseUpdateUser &User,
1587015890
bool SimpleConstIncOnly,
@@ -16066,6 +16086,9 @@ static bool TryCombineBaseUpdate(struct BaseUpdateTarget &Target,
1606616086
if (SimpleConstIncOnly && User.ConstInc != NumBytes)
1606716087
return false;
1606816088

16089+
if (!isValidBaseUpdate(N, User.N))
16090+
return false;
16091+
1606916092
// OK, we found an ADD we can fold into the base update.
1607016093
// Now, create a _UPD node, taking care of not breaking alignment.
1607116094

@@ -16214,21 +16237,6 @@ static bool findPointerConstIncrement(SDNode *N, SDValue *Ptr, SDValue *CInc) {
1621416237
}
1621516238
}
1621616239

16217-
static bool isValidBaseUpdate(SDNode *N, SDNode *User) {
16218-
// Check that the add is independent of the load/store.
16219-
// Otherwise, folding it would create a cycle. Search through Addr
16220-
// as well, since the User may not be a direct user of Addr and
16221-
// only share a base pointer.
16222-
SmallPtrSet<const SDNode *, 32> Visited;
16223-
SmallVector<const SDNode *, 16> Worklist;
16224-
Worklist.push_back(N);
16225-
Worklist.push_back(User);
16226-
if (SDNode::hasPredecessorHelper(N, Visited, Worklist) ||
16227-
SDNode::hasPredecessorHelper(User, Visited, Worklist))
16228-
return false;
16229-
return true;
16230-
}
16231-
1623216240
/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP,
1623316241
/// NEON load/store intrinsics, and generic vector load/stores, to merge
1623416242
/// base address updates.
@@ -16242,6 +16250,10 @@ static SDValue CombineBaseUpdate(SDNode *N,
1624216250
const unsigned AddrOpIdx = ((isIntrinsic || isStore) ? 2 : 1);
1624316251
BaseUpdateTarget Target = {N, isIntrinsic, isStore, AddrOpIdx};
1624416252

16253+
// Limit the number of possible base-updates we look at to prevent degenerate
16254+
// cases.
16255+
unsigned MaxBaseUpdates = ArmMaxBaseUpdatesToCheck;
16256+
1624516257
SDValue Addr = N->getOperand(AddrOpIdx);
1624616258

1624716259
SmallVector<BaseUpdateUser, 8> BaseUpdates;
@@ -16256,8 +16268,11 @@ static SDValue CombineBaseUpdate(SDNode *N,
1625616268
unsigned ConstInc =
1625716269
getPointerConstIncrement(User->getOpcode(), Addr, Inc, DCI.DAG);
1625816270

16259-
if (ConstInc || User->getOpcode() == ISD::ADD)
16271+
if (ConstInc || User->getOpcode() == ISD::ADD) {
1626016272
BaseUpdates.push_back({User, Inc, ConstInc});
16273+
if (BaseUpdates.size() > MaxBaseUpdates)
16274+
break;
16275+
}
1626116276
}
1626216277

1626316278
// If the address is a constant pointer increment itself, find
@@ -16284,27 +16299,19 @@ static SDValue CombineBaseUpdate(SDNode *N,
1628416299
unsigned NewConstInc = UserOffset - Offset;
1628516300
SDValue NewInc = DCI.DAG.getConstant(NewConstInc, SDLoc(N), MVT::i32);
1628616301
BaseUpdates.push_back({User, NewInc, NewConstInc});
16302+
if (BaseUpdates.size() > MaxBaseUpdates)
16303+
break;
1628716304
}
1628816305
}
1628916306

1629016307
// Try to fold the load/store with an update that matches memory
1629116308
// access size. This should work well for sequential loads.
16292-
//
16293-
// Filter out invalid updates as well.
1629416309
unsigned NumValidUpd = BaseUpdates.size();
16295-
for (unsigned I = 0; I < NumValidUpd;) {
16310+
for (unsigned I = 0; I < NumValidUpd; I++) {
1629616311
BaseUpdateUser &User = BaseUpdates[I];
16297-
if (!isValidBaseUpdate(N, User.N)) {
16298-
--NumValidUpd;
16299-
std::swap(BaseUpdates[I], BaseUpdates[NumValidUpd]);
16300-
continue;
16301-
}
16302-
1630316312
if (TryCombineBaseUpdate(Target, User, /*SimpleConstIncOnly=*/true, DCI))
1630416313
return SDValue();
16305-
++I;
1630616314
}
16307-
BaseUpdates.resize(NumValidUpd);
1630816315

1630916316
// Try to fold with other users. Non-constant updates are considered
1631016317
// first, and constant updates are sorted to not break a sequence of
@@ -16360,8 +16367,8 @@ static SDValue PerformMVEVLDCombine(SDNode *N,
1636016367
Visited.insert(Addr.getNode());
1636116368
Worklist.push_back(N);
1636216369
Worklist.push_back(User);
16363-
if (SDNode::hasPredecessorHelper(N, Visited, Worklist) ||
16364-
SDNode::hasPredecessorHelper(User, Visited, Worklist))
16370+
if (SDNode::hasPredecessorHelper(N, Visited, Worklist, 1024) ||
16371+
SDNode::hasPredecessorHelper(User, Visited, Worklist, 1024))
1636516372
continue;
1636616373

1636716374
// Find the new opcode for the updating load/store.

0 commit comments

Comments
 (0)