Skip to content

Commit 3da1a96

Browse files
committed
[Statepoints] Support lowering gc relocations to virtual registers
(Disabled under flag for the moment) This is part of a larger project wherein we are finally integrating lowering of gc live operands with the register allocator. Today, we force spill all operands in SelectionDAG. The code to do so is distinctly non-optimal. The approach this patch is working towards is to instead lower the relocations directly into the MI form, and let the register allocator pick which ones get spilled and which stack slots they get spilled to. In terms of performance, the later part is actually more important as it avoids redundant shuffling of values between stack slots. This particular change adds ISEL support to produce the variadic def STATEPOINT form required by the above. In particular, the first N are lowered to variadic tied def/use pairs. So new statepoint looks like this: reloc1,reloc2,... = STATEPOINT ..., base1, derived1<tied-def0>, base2, derived2<tied-def1>, ... N is limited by the maximal number of tied registers machine instruction can have (15 at the moment). The current patch is restricted to handling relocations within a single basic block. Cross block relocations (e.g. invokes) are handled via the legacy mechanism. This restriction will be relaxed in future patches. Patch By: dantrushin Differential Revision: https://reviews.llvm.org/D81648
1 parent 9182dc7 commit 3da1a96

File tree

6 files changed

+1074
-24
lines changed

6 files changed

+1074
-24
lines changed

llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,28 @@ static unsigned countOperands(SDNode *Node, unsigned NumExpUses,
8282
return N;
8383
}
8484

85+
/// Return starting index of GC operand list.
86+
// FIXME: need a better place for this. Put it in StackMaps?
87+
static unsigned getStatepointGCArgStartIdx(MachineInstr *MI) {
88+
assert(MI->getOpcode() == TargetOpcode::STATEPOINT &&
89+
"STATEPOINT node expected");
90+
unsigned OperIdx = StatepointOpers(MI).getNumDeoptArgsIdx();
91+
unsigned NumDeopts = MI->getOperand(OperIdx).getImm();
92+
// At this point stack references has not been lowered yet, so they
93+
// take single operand.
94+
++OperIdx;
95+
while (NumDeopts--) {
96+
MachineOperand &MO = MI->getOperand(OperIdx);
97+
if (MO.isImm() && MO.getImm() == StackMaps::ConstantOp) {
98+
++OperIdx;
99+
assert(MI->getOperand(OperIdx).isImm() &&
100+
"Unexpected statepoint operand");
101+
}
102+
++OperIdx;
103+
}
104+
return OperIdx;
105+
}
106+
85107
/// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an
86108
/// implicit physical register output.
87109
void InstrEmitter::
@@ -200,6 +222,8 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node,
200222
bool HasVRegVariadicDefs = !MF->getTarget().usesPhysRegsForValues() &&
201223
II.isVariadic() && II.variadicOpsAreDefs();
202224
unsigned NumVRegs = HasVRegVariadicDefs ? NumResults : II.getNumDefs();
225+
if (Node->getMachineOpcode() == TargetOpcode::STATEPOINT)
226+
NumVRegs = NumResults;
203227
for (unsigned i = 0; i < NumVRegs; ++i) {
204228
// If the specific node value is only used by a CopyToReg and the dest reg
205229
// is a vreg in the same register class, use the CopyToReg'd destination
@@ -821,6 +845,8 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
821845
NumDefs = NumResults;
822846
}
823847
ScratchRegs = TLI->getScratchRegisters((CallingConv::ID) CC);
848+
} else if (Opc == TargetOpcode::STATEPOINT) {
849+
NumDefs = NumResults;
824850
}
825851

826852
unsigned NumImpUses = 0;
@@ -970,6 +996,20 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
970996
if (!UsedRegs.empty() || II.getImplicitDefs() || II.hasOptionalDef())
971997
MIB->setPhysRegsDeadExcept(UsedRegs, *TRI);
972998

999+
// STATEPOINT is too 'dynamic' to have meaningful machine description.
1000+
// We have to manually tie operands.
1001+
if (Opc == TargetOpcode::STATEPOINT && NumDefs > 0) {
1002+
assert(!HasPhysRegOuts && "STATEPOINT mishandled");
1003+
MachineInstr *MI = MIB;
1004+
unsigned Def = 0;
1005+
unsigned Use = getStatepointGCArgStartIdx(MI) + 1;
1006+
while (Def < NumDefs) {
1007+
if (MI->getOperand(Use).isReg())
1008+
MI->tieOperands(Def++, Use);
1009+
Use += 2;
1010+
}
1011+
}
1012+
9731013
// Run post-isel target hook to adjust this instruction if needed.
9741014
if (II.hasPostISelHook())
9751015
TLI->AdjustInstrPostInstrSelection(*MIB, Node);

llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -125,8 +125,7 @@ static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
125125
PhysReg = Reg;
126126
} else if (Def->isMachineOpcode()) {
127127
const MCInstrDesc &II = TII->get(Def->getMachineOpcode());
128-
if (ResNo >= II.getNumDefs() &&
129-
II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg)
128+
if (ResNo >= II.getNumDefs() && II.hasImplicitDefOfPhysReg(Reg))
130129
PhysReg = Reg;
131130
}
132131

llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp

Lines changed: 111 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,10 @@ cl::opt<bool> UseRegistersForDeoptValues(
6767
"use-registers-for-deopt-values", cl::Hidden, cl::init(false),
6868
cl::desc("Allow using registers for non pointer deopt args"));
6969

70+
cl::opt<unsigned> MaxRegistersForGCPointers(
71+
"max-registers-for-gc-values", cl::Hidden, cl::init(0),
72+
cl::desc("Max number of VRegs allowed to pass GC pointer meta args in"));
73+
7074
static void pushStackMapConstant(SmallVectorImpl<SDValue>& Ops,
7175
SelectionDAGBuilder &Builder, uint64_t Value) {
7276
SDLoc L = Builder.getCurSDLoc();
@@ -86,11 +90,13 @@ void StatepointLoweringState::startNewStatepoint(SelectionDAGBuilder &Builder) {
8690
// FunctionLoweringInfo. Also need to ensure used bits get cleared.
8791
AllocatedStackSlots.clear();
8892
AllocatedStackSlots.resize(Builder.FuncInfo.StatepointStackSlots.size());
93+
DerivedPtrMap.clear();
8994
}
9095

9196
void StatepointLoweringState::clear() {
9297
Locations.clear();
9398
AllocatedStackSlots.clear();
99+
DerivedPtrMap.clear();
94100
assert(PendingGCRelocateCalls.empty() &&
95101
"cleared before statepoint sequence completed");
96102
}
@@ -221,7 +227,6 @@ static Optional<int> findPreviousSpillSlot(const Value *Val,
221227
return None;
222228
}
223229

224-
225230
/// Return true if-and-only-if the given SDValue can be lowered as either a
226231
/// constant argument or a stack reference. The key point is that the value
227232
/// doesn't need to be spilled or tracked as a vreg use.
@@ -242,7 +247,6 @@ static bool willLowerDirectly(SDValue Incoming) {
242247
Incoming.isUndef());
243248
}
244249

245-
246250
/// Try to find existing copies of the incoming values in stack slots used for
247251
/// statepoint spilling. If we can find a spill slot for the incoming value,
248252
/// mark that slot as allocated, and reuse the same slot for this safepoint.
@@ -388,7 +392,7 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain,
388392
StoreMMO);
389393

390394
MMO = getMachineMemOperand(MF, *cast<FrameIndexSDNode>(Loc));
391-
395+
392396
Builder.StatepointLowering.setLocation(Incoming, Loc);
393397
}
394398

@@ -485,7 +489,9 @@ lowerIncomingStatepointValue(SDValue Incoming, bool RequireSpillSlot,
485489
/// will be set to the last value spilled (if any were).
486490
static void
487491
lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
488-
SmallVectorImpl<MachineMemOperand*> &MemRefs, SelectionDAGBuilder::StatepointLoweringInfo &SI,
492+
SmallVectorImpl<MachineMemOperand *> &MemRefs,
493+
DenseMap<SDValue, int> &LowerAsVReg,
494+
SelectionDAGBuilder::StatepointLoweringInfo &SI,
489495
SelectionDAGBuilder &Builder) {
490496
// Lower the deopt and gc arguments for this statepoint. Layout will be:
491497
// deopt argument length, deopt arguments.., gc arguments...
@@ -531,6 +537,37 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
531537
const bool LiveInDeopt =
532538
SI.StatepointFlags & (uint64_t)StatepointFlags::DeoptLiveIn;
533539

540+
// Decide which deriver pointers will go on VRegs
541+
const unsigned MaxTiedRegs = 15; // Max number of tied regs MI can have.
542+
unsigned MaxVRegPtrs =
543+
std::min(MaxTiedRegs, MaxRegistersForGCPointers.getValue());
544+
// Use old spill scheme for cross-block relocates.
545+
if (SI.StatepointInstr) {
546+
const BasicBlock *BB = SI.StatepointInstr->getParent();
547+
bool NonLocalReloc =
548+
llvm::any_of(SI.GCRelocates, [BB](const GCRelocateInst *R) {
549+
return R->getParent() != BB;
550+
});
551+
if (NonLocalReloc)
552+
MaxVRegPtrs = 0;
553+
}
554+
555+
LLVM_DEBUG(dbgs() << "Desiding how to lower GC Pointers:\n");
556+
unsigned CurNumVRegs = 0;
557+
for (const Value *P : SI.Ptrs) {
558+
if (LowerAsVReg.size() == MaxVRegPtrs)
559+
break;
560+
SDValue PtrSD = Builder.getValue(P);
561+
if (willLowerDirectly(PtrSD) || P->getType()->isVectorTy()) {
562+
LLVM_DEBUG(dbgs() << "direct/spill "; PtrSD.dump(&Builder.DAG));
563+
continue;
564+
}
565+
LLVM_DEBUG(dbgs() << "vreg "; PtrSD.dump(&Builder.DAG));
566+
LowerAsVReg[PtrSD] = CurNumVRegs++;
567+
}
568+
LLVM_DEBUG(dbgs() << LowerAsVReg.size()
569+
<< " derived pointers will go in vregs\n");
570+
534571
auto isGCValue = [&](const Value *V) {
535572
auto *Ty = V->getType();
536573
if (!Ty->isPtrOrPtrVectorTy())
@@ -542,7 +579,9 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
542579
};
543580

544581
auto requireSpillSlot = [&](const Value *V) {
545-
return !(LiveInDeopt || UseRegistersForDeoptValues) || isGCValue(V);
582+
if (isGCValue(V))
583+
return !LowerAsVReg.count(Builder.getValue(V));
584+
return !(LiveInDeopt || UseRegistersForDeoptValues);
546585
};
547586

548587
// Before we actually start lowering (and allocating spill slots for values),
@@ -554,9 +593,14 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
554593
if (requireSpillSlot(V))
555594
reservePreviousStackSlotForValue(V, Builder);
556595
}
596+
557597
for (unsigned i = 0; i < SI.Bases.size(); ++i) {
558-
reservePreviousStackSlotForValue(SI.Bases[i], Builder);
559-
reservePreviousStackSlotForValue(SI.Ptrs[i], Builder);
598+
SDValue SDV = Builder.getValue(SI.Bases[i]);
599+
if (!LowerAsVReg.count(SDV))
600+
reservePreviousStackSlotForValue(SI.Bases[i], Builder);
601+
SDV = Builder.getValue(SI.Ptrs[i]);
602+
if (!LowerAsVReg.count(SDV))
603+
reservePreviousStackSlotForValue(SI.Ptrs[i], Builder);
560604
}
561605

562606
// First, prefix the list with the number of unique values to be
@@ -567,6 +611,7 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
567611

568612
// The vm state arguments are lowered in an opaque manner. We do not know
569613
// what type of values are contained within.
614+
LLVM_DEBUG(dbgs() << "Lowering deopt state\n");
570615
for (const Value *V : SI.DeoptState) {
571616
SDValue Incoming;
572617
// If this is a function argument at a static frame index, generate it as
@@ -578,6 +623,8 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
578623
}
579624
if (!Incoming.getNode())
580625
Incoming = Builder.getValue(V);
626+
LLVM_DEBUG(dbgs() << "Value " << *V
627+
<< " requireSpillSlot = " << requireSpillSlot(V) << "\n");
581628
lowerIncomingStatepointValue(Incoming, requireSpillSlot(V), Ops, MemRefs,
582629
Builder);
583630
}
@@ -588,14 +635,15 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
588635
// it's (lowered) derived pointer. i.e
589636
// (base[0], ptr[0], base[1], ptr[1], ...)
590637
for (unsigned i = 0; i < SI.Bases.size(); ++i) {
591-
const Value *Base = SI.Bases[i];
592-
lowerIncomingStatepointValue(Builder.getValue(Base),
593-
/*RequireSpillSlot*/ true, Ops, MemRefs,
638+
bool RequireSpillSlot;
639+
SDValue Base = Builder.getValue(SI.Bases[i]);
640+
RequireSpillSlot = !LowerAsVReg.count(Base);
641+
lowerIncomingStatepointValue(Base, RequireSpillSlot, Ops, MemRefs,
594642
Builder);
595643

596-
const Value *Ptr = SI.Ptrs[i];
597-
lowerIncomingStatepointValue(Builder.getValue(Ptr),
598-
/*RequireSpillSlot*/ true, Ops, MemRefs,
644+
SDValue Derived = Builder.getValue(SI.Ptrs[i]);
645+
RequireSpillSlot = !LowerAsVReg.count(Derived);
646+
lowerIncomingStatepointValue(Derived, RequireSpillSlot, Ops, MemRefs,
599647
Builder);
600648
}
601649

@@ -630,7 +678,9 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
630678
SDValue SDV = Builder.getValue(V);
631679
SDValue Loc = Builder.StatepointLowering.getLocation(SDV);
632680

633-
if (Loc.getNode()) {
681+
if (LowerAsVReg.count(SDV)) {
682+
SpillMap[V] = None;
683+
} else if (Loc.getNode()) {
634684
SpillMap[V] = cast<FrameIndexSDNode>(Loc)->getIndex();
635685
} else {
636686
// Record value as visited, but not spilled. This is case for allocas
@@ -665,6 +715,7 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT(
665715
assert(SI.Bases.size() == SI.Ptrs.size() &&
666716
SI.Ptrs.size() <= SI.GCRelocates.size());
667717

718+
LLVM_DEBUG(dbgs() << "Lowering statepoint " << *SI.StatepointInstr << "\n");
668719
#ifndef NDEBUG
669720
for (auto *Reloc : SI.GCRelocates)
670721
if (Reloc->getParent() == SI.StatepointInstr->getParent())
@@ -674,7 +725,9 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT(
674725
// Lower statepoint vmstate and gcstate arguments
675726
SmallVector<SDValue, 10> LoweredMetaArgs;
676727
SmallVector<MachineMemOperand*, 16> MemRefs;
677-
lowerStatepointMetaArgs(LoweredMetaArgs, MemRefs, SI, *this);
728+
// Maps derived pointer SDValue to statepoint result of relocated pointer.
729+
DenseMap<SDValue, int> LowerAsVReg;
730+
lowerStatepointMetaArgs(LoweredMetaArgs, MemRefs, LowerAsVReg, SI, *this);
678731

679732
// Now that we've emitted the spills, we need to update the root so that the
680733
// call sequence is ordered correctly.
@@ -788,14 +841,35 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT(
788841

789842
// Compute return values. Provide a glue output since we consume one as
790843
// input. This allows someone else to chain off us as needed.
791-
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
844+
SmallVector<EVT, 8> NodeTys;
845+
for (auto &Ptr : SI.Ptrs) {
846+
SDValue SD = getValue(Ptr);
847+
if (LowerAsVReg.count(SD)) {
848+
NodeTys.push_back(SD.getValueType());
849+
}
850+
}
851+
LLVM_DEBUG(dbgs() << "Statepoint has " << NodeTys.size() << " results\n");
852+
assert(NodeTys.size() == LowerAsVReg.size() && "Inconsistent GC Ptr lowering");
853+
NodeTys.push_back(MVT::Other);
854+
NodeTys.push_back(MVT::Glue);
792855

856+
unsigned NumResults = NodeTys.size();
793857
MachineSDNode *StatepointMCNode =
794858
DAG.getMachineNode(TargetOpcode::STATEPOINT, getCurSDLoc(), NodeTys, Ops);
795859
DAG.setNodeMemRefs(StatepointMCNode, MemRefs);
796860

797861
SDNode *SinkNode = StatepointMCNode;
798862

863+
// Fill mapping from derived pointer to statepoint result denoting its
864+
// relocated value.
865+
auto &DPtrMap = StatepointLowering.DerivedPtrMap;
866+
for (const auto *Relocate : SI.GCRelocates) {
867+
Value *Derived = Relocate->getDerivedPtr();
868+
SDValue SD = getValue(Derived);
869+
if (LowerAsVReg.count(SD))
870+
DPtrMap[Derived] = SDValue(StatepointMCNode, LowerAsVReg[SD]);
871+
}
872+
799873
// Build the GC_TRANSITION_END node if necessary.
800874
//
801875
// See the comment above regarding GC_TRANSITION_START for the layout of
@@ -804,7 +878,7 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT(
804878
SmallVector<SDValue, 8> TEOps;
805879

806880
// Add chain
807-
TEOps.push_back(SDValue(StatepointMCNode, 0));
881+
TEOps.push_back(SDValue(StatepointMCNode, NumResults - 2));
808882

809883
// Add GC transition arguments
810884
for (const Value *V : SI.GCTransitionArgs) {
@@ -814,7 +888,7 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT(
814888
}
815889

816890
// Add glue
817-
TEOps.push_back(SDValue(StatepointMCNode, 1));
891+
TEOps.push_back(SDValue(StatepointMCNode, NumResults - 1));
818892

819893
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
820894

@@ -825,7 +899,12 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT(
825899
}
826900

827901
// Replace original call
828-
DAG.ReplaceAllUsesWith(CallNode, SinkNode); // This may update Root
902+
// Call: ch,glue = CALL ...
903+
// Statepoint: [gc relocates],ch,glue = STATEPOINT ...
904+
unsigned NumSinkValues = SinkNode->getNumValues();
905+
SDValue StatepointValues[2] = {SDValue(SinkNode, NumSinkValues - 2),
906+
SDValue(SinkNode, NumSinkValues - 1)};
907+
DAG.ReplaceAllUsesWith(CallNode, StatepointValues);
829908
// Remove original call node
830909
DAG.DeleteNode(CallNode);
831910

@@ -927,7 +1006,7 @@ SelectionDAGBuilder::LowerStatepoint(const GCStatepointInst &I,
9271006
setValue(&I, ReturnValue);
9281007
return;
9291008
}
930-
1009+
9311010
// Result value will be used in a different basic block so we need to export
9321011
// it now. Default exporting mechanism will not work here because statepoint
9331012
// call has a different type than the actual call. It means that by default
@@ -1010,12 +1089,13 @@ void SelectionDAGBuilder::visitGCResult(const GCResultInst &CI) {
10101089
}
10111090

10121091
void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) {
1092+
const BasicBlock *StatepointBB = Relocate.getStatepoint()->getParent();
10131093
#ifndef NDEBUG
10141094
// Consistency check
10151095
// We skip this check for relocates not in the same basic block as their
10161096
// statepoint. It would be too expensive to preserve validation info through
10171097
// different basic blocks.
1018-
if (Relocate.getStatepoint()->getParent() == Relocate.getParent())
1098+
if (StatepointBB == Relocate.getParent())
10191099
StatepointLowering.relocCallVisited(Relocate);
10201100

10211101
auto *Ty = Relocate.getType()->getScalarType();
@@ -1033,6 +1113,16 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) {
10331113
return;
10341114
}
10351115

1116+
// Relocate is local to statepoint block and its pointer was assigned
1117+
// to VReg. Use corresponding statepoint result.
1118+
auto &DPtrMap = StatepointLowering.DerivedPtrMap;
1119+
auto It = DPtrMap.find(DerivedPtr);
1120+
if (It != DPtrMap.end()) {
1121+
setValue(&Relocate, It->second);
1122+
assert(Relocate.getParent() == StatepointBB && "unexpected DPtrMap entry");
1123+
return;
1124+
}
1125+
10361126
auto &SpillMap = FuncInfo.StatepointSpillMaps[Relocate.getStatepoint()];
10371127
auto SlotIt = SpillMap.find(DerivedPtr);
10381128
assert(SlotIt != SpillMap.end() && "Relocating not lowered gc value");

llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,10 @@ class StatepointLoweringState {
103103
return AllocatedStackSlots.test(Offset);
104104
}
105105

106+
/// For each statepoint keep mapping from original derived pointer to
107+
/// the statepoint node result defining its new value.
108+
DenseMap<const Value *, SDValue> DerivedPtrMap;
109+
106110
private:
107111
/// Maps pre-relocation value (gc pointer directly incoming into statepoint)
108112
/// into it's location (currently only stack slots)

0 commit comments

Comments
 (0)