@@ -111,10 +111,17 @@ static cl::opt<int> MaxLdStGlue("ldstmemcpy-glue-max",
111
111
cl::desc("Number limit for gluing ld/st of memcpy."),
112
112
cl::Hidden, cl::init(0));
113
113
114
+ static cl::opt<unsigned>
115
+ MaxSteps("has-predecessor-max-steps", cl::Hidden, cl::init(8192),
116
+ cl::desc("DAG combiner limit number of steps when searching DAG "
117
+ "for predecessor nodes"));
118
+
114
119
static void NewSDValueDbgMsg(SDValue V, StringRef Msg, SelectionDAG *G) {
115
120
LLVM_DEBUG(dbgs() << Msg; V.getNode()->dump(G););
116
121
}
117
122
123
+ unsigned SelectionDAG::getHasPredecessorMaxSteps() { return MaxSteps; }
124
+
118
125
//===----------------------------------------------------------------------===//
119
126
// ConstantFPSDNode Class
120
127
//===----------------------------------------------------------------------===//
@@ -2474,6 +2481,51 @@ SDValue SelectionDAG::getPartialReduceAdd(SDLoc DL, EVT ReducedTy, SDValue Op1,
2474
2481
return Subvectors[0];
2475
2482
}
2476
2483
2484
+ /// Given a store node \p StoreNode, return true if it is safe to fold that node
2485
+ /// into \p FPNode, which expands to a library call with output pointers.
2486
+ static bool canFoldStoreIntoLibCallOutputPointers(StoreSDNode *StoreNode,
2487
+ SDNode *FPNode) {
2488
+ SmallVector<const SDNode *, 8> Worklist;
2489
+ SmallVector<const SDNode *, 8> DeferredNodes;
2490
+ SmallPtrSet<const SDNode *, 16> Visited;
2491
+
2492
+ // Skip FPNode use by StoreNode (that's the use we want to fold into FPNode).
2493
+ for (SDValue Op : StoreNode->ops())
2494
+ if (Op.getNode() != FPNode)
2495
+ Worklist.push_back(Op.getNode());
2496
+
2497
+ unsigned MaxSteps = SelectionDAG::getHasPredecessorMaxSteps();
2498
+ while (!Worklist.empty()) {
2499
+ const SDNode *Node = Worklist.pop_back_val();
2500
+ auto [_, Inserted] = Visited.insert(Node);
2501
+ if (!Inserted)
2502
+ continue;
2503
+
2504
+ if (MaxSteps > 0 && Visited.size() >= MaxSteps)
2505
+ return false;
2506
+
2507
+ // Reached the FPNode (would result in a cycle).
2508
+ // OR Reached CALLSEQ_START (would result in nested call sequences).
2509
+ if (Node == FPNode || Node->getOpcode() == ISD::CALLSEQ_START)
2510
+ return false;
2511
+
2512
+ if (Node->getOpcode() == ISD::CALLSEQ_END) {
2513
+ // Defer looking into call sequences (so we can check we're outside one).
2514
+ // We still need to look through these for the predecessor check.
2515
+ DeferredNodes.push_back(Node);
2516
+ continue;
2517
+ }
2518
+
2519
+ for (SDValue Op : Node->ops())
2520
+ Worklist.push_back(Op.getNode());
2521
+ }
2522
+
2523
+ // True if we're outside a call sequence and don't have the FPNode as a
2524
+ // predecessor. No cycles or nested call sequences possible.
2525
+ return !SDNode::hasPredecessorHelper(FPNode, Visited, DeferredNodes,
2526
+ MaxSteps);
2527
+ }
2528
+
2477
2529
bool SelectionDAG::expandMultipleResultFPLibCall(
2478
2530
RTLIB::Libcall LC, SDNode *Node, SmallVectorImpl<SDValue> &Results,
2479
2531
std::optional<unsigned> CallRetResNo) {
@@ -2502,26 +2554,35 @@ bool SelectionDAG::expandMultipleResultFPLibCall(
2502
2554
2503
2555
// Find users of the node that store the results (and share input chains). The
2504
2556
// destination pointers can be used instead of creating stack allocations.
2505
- // FIXME: This should allow stores with the same chains (not just the entry
2506
- // chain), but there's a risk the store is within a (CALLSEQ_START,
2507
- // CALLSEQ_END) pair, which after this expansion will lead to nested call
2508
- // sequences.
2509
- SDValue InChain = getEntryNode();
2557
+ SDValue StoresInChain;
2510
2558
SmallVector<StoreSDNode *, 2> ResultStores(NumResults);
2511
2559
for (SDNode *User : Node->uses()) {
2512
2560
if (!ISD::isNormalStore(User))
2513
2561
continue;
2514
2562
auto *ST = cast<StoreSDNode>(User);
2515
2563
SDValue StoreValue = ST->getValue();
2516
2564
unsigned ResNo = StoreValue.getResNo();
2565
+ // Ensure the store corresponds to an output pointer.
2566
+ if (CallRetResNo == ResNo)
2567
+ continue;
2568
+ // Ensure the store to the default address space and not atomic or volatile.
2569
+ if (!ST->isSimple() || ST->getAddressSpace() != 0)
2570
+ continue;
2571
+ // Ensure all store chains are the same (so they don't alias).
2572
+ if (StoresInChain && ST->getChain() != StoresInChain)
2573
+ continue;
2574
+ // Ensure the store is properly aligned.
2517
2575
Type *StoreType = StoreValue.getValueType().getTypeForEVT(Ctx);
2518
- if (CallRetResNo == ResNo || !ST->isSimple() ||
2519
- ST->getAddressSpace() != 0 ||
2520
- ST->getAlign() <
2521
- getDataLayout().getABITypeAlign(StoreType->getScalarType()) ||
2522
- ST->getChain() != InChain)
2576
+ if (ST->getAlign() <
2577
+ getDataLayout().getABITypeAlign(StoreType->getScalarType()))
2578
+ continue;
2579
+ // Avoid:
2580
+ // 1. Creating cyclic dependencies.
2581
+ // 2. Expanding the node to a call within a call sequence.
2582
+ if (!canFoldStoreIntoLibCallOutputPointers(ST, Node))
2523
2583
continue;
2524
2584
ResultStores[ResNo] = ST;
2585
+ StoresInChain = ST->getChain();
2525
2586
}
2526
2587
2527
2588
TargetLowering::ArgListTy Args;
@@ -2563,6 +2624,7 @@ bool SelectionDAG::expandMultipleResultFPLibCall(
2563
2624
Type *RetType = CallRetResNo.has_value()
2564
2625
? Node->getValueType(*CallRetResNo).getTypeForEVT(Ctx)
2565
2626
: Type::getVoidTy(Ctx);
2627
+ SDValue InChain = StoresInChain ? StoresInChain : getEntryNode();
2566
2628
SDValue Callee = getExternalSymbol(VD ? VD->getVectorFnName().data() : LCName,
2567
2629
TLI->getPointerTy(getDataLayout()));
2568
2630
TargetLowering::CallLoweringInfo CLI(*this);
0 commit comments