Skip to content

Commit 95a0666

Browse files
committed
[AMDGPU] ISel for @llvm.amdgcn.cs.chain intrinsic
The @llvm.amdgcn.cs.chain intrinsic is essentially a call. The call parameters are bundled up into 2 intrinsic arguments, one for those that should go in the SGPRs (the 3rd intrinsic argument), and one for those that should go in the VGPRs (the 4th intrinsic argument). Both will often be some kind of aggregate. Both instruction selection frameworks have some internal representation for intrinsics (G_INTRINSIC[_WITH_SIDE_EFFECTS] for GlobalISel, ISD::INTRINSIC_[VOID|WITH_CHAIN] for DAGISel), but we can't use those because aggregates are dissolved very early on during ISel and we'd lose the inreg information. Therefore, this patch shortcircuits both the IRTranslator and SelectionDAGBuilder to lower this intrinsic as a call from the very start. It tries to use the existing infrastructure as much as possible, by calling into the code for lowering tail calls. This has already gone through a few rounds of review in Phab: Differential Revision: https://reviews.llvm.org/D153761
1 parent ed97932 commit 95a0666

11 files changed

+2537
-16
lines changed

llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@
6262
#include "llvm/IR/Instructions.h"
6363
#include "llvm/IR/IntrinsicInst.h"
6464
#include "llvm/IR/Intrinsics.h"
65+
#include "llvm/IR/IntrinsicsAMDGPU.h"
6566
#include "llvm/IR/LLVMContext.h"
6667
#include "llvm/IR/Metadata.h"
6768
#include "llvm/IR/PatternMatch.h"
@@ -2390,6 +2391,8 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
23902391
Info.OrigRet = {Register(), Type::getVoidTy(CI.getContext()), 0};
23912392
return CLI->lowerCall(MIRBuilder, Info);
23922393
}
2394+
case Intrinsic::amdgcn_cs_chain:
2395+
return translateCallBase(CI, MIRBuilder);
23932396
case Intrinsic::fptrunc_round: {
23942397
uint32_t Flags = MachineInstr::copyFlagsFromInstruction(CI);
23952398

llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@
7676
#include "llvm/IR/IntrinsicInst.h"
7777
#include "llvm/IR/Intrinsics.h"
7878
#include "llvm/IR/IntrinsicsAArch64.h"
79+
#include "llvm/IR/IntrinsicsAMDGPU.h"
7980
#include "llvm/IR/IntrinsicsWebAssembly.h"
8081
#include "llvm/IR/LLVMContext.h"
8182
#include "llvm/IR/Metadata.h"
@@ -7424,6 +7425,59 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
74247425
setValue(&I, Val);
74257426
return;
74267427
}
7428+
case Intrinsic::amdgcn_cs_chain: {
7429+
assert(I.arg_size() == 5 && "Additional args not supported yet");
7430+
assert(cast<ConstantInt>(I.getOperand(4))->isZero() &&
7431+
"Non-zero flags not supported yet");
7432+
7433+
// At this point we don't care if it's amdgpu_cs_chain or
7434+
// amdgpu_cs_chain_preserve.
7435+
CallingConv::ID CC = CallingConv::AMDGPU_CS_Chain;
7436+
7437+
Type *RetTy = I.getType();
7438+
assert(RetTy->isVoidTy() && "Should not return");
7439+
7440+
SDValue Callee = getValue(I.getOperand(0));
7441+
7442+
// We only have 2 actual args: one for the SGPRs and one for the VGPRs.
7443+
TargetLowering::ArgListTy Args;
7444+
Args.reserve(2);
7445+
7446+
for (unsigned Idx : {2, 3}) {
7447+
TargetLowering::ArgListEntry Arg;
7448+
Arg.Node = getValue(I.getOperand(Idx));
7449+
Arg.Ty = I.getOperand(Idx)->getType();
7450+
Arg.setAttributes(&I, Idx);
7451+
Args.push_back(Arg);
7452+
}
7453+
7454+
assert(Args[0].IsInReg && "SGPR args should be marked inreg");
7455+
assert(!Args[1].IsInReg && "VGPR args should not be marked inreg");
7456+
7457+
// We're also going to pass the EXEC mask as the last argument.
7458+
TargetLowering::ArgListEntry Arg;
7459+
Arg.Node = getValue(I.getOperand(1));
7460+
Arg.Ty = I.getOperand(1)->getType();
7461+
Arg.IsInReg = true;
7462+
Args.push_back(Arg);
7463+
7464+
TargetLowering::CallLoweringInfo CLI(DAG);
7465+
CLI.setDebugLoc(getCurSDLoc())
7466+
.setChain(getRoot())
7467+
.setCallee(CC, RetTy, Callee, std::move(Args))
7468+
.setNoReturn(true)
7469+
.setTailCall(true)
7470+
.setConvergent(I.isConvergent());
7471+
CLI.CB = &I;
7472+
std::pair<SDValue, SDValue> Result =
7473+
lowerInvokable(CLI, /*EHPadBB*/ nullptr);
7474+
(void)Result;
7475+
assert(!Result.first.getNode() && !Result.second.getNode() &&
7476+
"Should've lowered as tail call");
7477+
7478+
HasTailCall = true;
7479+
return;
7480+
}
74277481
case Intrinsic::ptrmask: {
74287482
SDValue Ptr = getValue(I.getOperand(0));
74297483
SDValue Mask = getValue(I.getOperand(1));

llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp

Lines changed: 87 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -958,8 +958,10 @@ getAssignFnsForCC(CallingConv::ID CC, const SITargetLowering &TLI) {
958958

959959
static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect,
960960
bool IsTailCall, CallingConv::ID CC) {
961-
assert(!(IsIndirect && IsTailCall) && "Indirect calls can't be tail calls, "
962-
"because the address can be divergent");
961+
// For calls to amdgpu_cs_chain functions, the address is known to be uniform.
962+
assert((AMDGPU::isChainCC(CC) || !IsIndirect || !IsTailCall) &&
963+
"Indirect calls can't be tail calls, "
964+
"because the address can be divergent");
963965
if (!IsTailCall)
964966
return AMDGPU::G_SI_CALL;
965967

@@ -1150,14 +1152,20 @@ bool AMDGPUCallLowering::isEligibleForTailCallOptimization(
11501152
void AMDGPUCallLowering::handleImplicitCallArguments(
11511153
MachineIRBuilder &MIRBuilder, MachineInstrBuilder &CallInst,
11521154
const GCNSubtarget &ST, const SIMachineFunctionInfo &FuncInfo,
1155+
CallingConv::ID CalleeCC,
11531156
ArrayRef<std::pair<MCRegister, Register>> ImplicitArgRegs) const {
11541157
if (!ST.enableFlatScratch()) {
11551158
// Insert copies for the SRD. In the HSA case, this should be an identity
11561159
// copy.
11571160
auto ScratchRSrcReg = MIRBuilder.buildCopy(LLT::fixed_vector(4, 32),
11581161
FuncInfo.getScratchRSrcReg());
1159-
MIRBuilder.buildCopy(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, ScratchRSrcReg);
1160-
CallInst.addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Implicit);
1162+
1163+
auto CalleeRSrcReg = AMDGPU::isChainCC(CalleeCC)
1164+
? AMDGPU::SGPR48_SGPR49_SGPR50_SGPR51
1165+
: AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
1166+
1167+
MIRBuilder.buildCopy(CalleeRSrcReg, ScratchRSrcReg);
1168+
CallInst.addReg(CalleeRSrcReg, RegState::Implicit);
11611169
}
11621170

11631171
for (std::pair<MCRegister, Register> ArgReg : ImplicitArgRegs) {
@@ -1253,7 +1261,8 @@ bool AMDGPUCallLowering::lowerTailCall(
12531261
// after the ordinary user argument registers.
12541262
SmallVector<std::pair<MCRegister, Register>, 12> ImplicitArgRegs;
12551263

1256-
if (Info.CallConv != CallingConv::AMDGPU_Gfx) {
1264+
if (Info.CallConv != CallingConv::AMDGPU_Gfx &&
1265+
!AMDGPU::isChainCC(Info.CallConv)) {
12571266
// With a fixed ABI, allocate fixed registers before user arguments.
12581267
if (!passSpecialInputs(MIRBuilder, CCInfo, ImplicitArgRegs, Info))
12591268
return false;
@@ -1269,7 +1278,8 @@ bool AMDGPUCallLowering::lowerTailCall(
12691278
if (!handleAssignments(Handler, OutArgs, CCInfo, ArgLocs, MIRBuilder))
12701279
return false;
12711280

1272-
handleImplicitCallArguments(MIRBuilder, MIB, ST, *FuncInfo, ImplicitArgRegs);
1281+
handleImplicitCallArguments(MIRBuilder, MIB, ST, *FuncInfo, CalleeCC,
1282+
ImplicitArgRegs);
12731283

12741284
// If we have -tailcallopt, we need to adjust the stack. We'll do the call
12751285
// sequence start and end here.
@@ -1283,6 +1293,23 @@ bool AMDGPUCallLowering::lowerTailCall(
12831293
MIRBuilder.buildInstr(AMDGPU::ADJCALLSTACKDOWN).addImm(NumBytes).addImm(0);
12841294
}
12851295

1296+
// If this is a chain call, we need to set EXEC right before the call.
1297+
if (AMDGPU::isChainCC(Info.CallConv)) {
1298+
ArgInfo ExecArg = Info.OrigArgs[1];
1299+
assert(ExecArg.Regs.size() == 1 && "Too many regs for EXEC");
1300+
1301+
if (!ExecArg.Ty->isIntegerTy(ST.getWavefrontSize()))
1302+
return false;
1303+
1304+
unsigned MovOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
1305+
MCRegister Exec = TRI->getExec();
1306+
auto SetExec =
1307+
MIRBuilder.buildInstr(MovOpc).addDef(Exec).addReg(ExecArg.Regs[0]);
1308+
SetExec->getOperand(1).setReg(constrainOperandRegClass(
1309+
MF, *TRI, MRI, *ST.getInstrInfo(), *ST.getRegBankInfo(), *SetExec,
1310+
SetExec->getDesc(), SetExec->getOperand(1), 1));
1311+
}
1312+
12861313
// Now we can add the actual call instruction to the correct basic block.
12871314
MIRBuilder.insertInstr(MIB);
12881315

@@ -1303,8 +1330,60 @@ bool AMDGPUCallLowering::lowerTailCall(
13031330
return true;
13041331
}
13051332

1333+
/// Lower a call to the @llvm.amdgcn.cs.chain intrinsic.
1334+
bool AMDGPUCallLowering::lowerChainCall(MachineIRBuilder &MIRBuilder,
1335+
CallLoweringInfo &Info) const {
1336+
ArgInfo Callee = Info.OrigArgs[0];
1337+
ArgInfo SGPRArgs = Info.OrigArgs[2];
1338+
ArgInfo VGPRArgs = Info.OrigArgs[3];
1339+
ArgInfo Flags = Info.OrigArgs[4];
1340+
1341+
assert(cast<ConstantInt>(Flags.OrigValue)->isZero() &&
1342+
"Non-zero flags aren't supported yet.");
1343+
assert(Info.OrigArgs.size() == 5 && "Additional args aren't supported yet.");
1344+
1345+
MachineFunction &MF = MIRBuilder.getMF();
1346+
const Function &F = MF.getFunction();
1347+
const DataLayout &DL = F.getParent()->getDataLayout();
1348+
1349+
// The function to jump to is actually the first argument, so we'll change the
1350+
// Callee and other info to match that before using our existing helper.
1351+
const Value *CalleeV = Callee.OrigValue->stripPointerCasts();
1352+
if (const Function *F = dyn_cast<Function>(CalleeV)) {
1353+
Info.Callee = MachineOperand::CreateGA(F, 0);
1354+
Info.CallConv = F->getCallingConv();
1355+
} else {
1356+
assert(Callee.Regs.size() == 1 && "Too many regs for the callee");
1357+
Info.Callee = MachineOperand::CreateReg(Callee.Regs[0], false);
1358+
Info.CallConv = CallingConv::AMDGPU_CS_Chain; // amdgpu_cs_chain_preserve
1359+
// behaves the same here.
1360+
}
1361+
1362+
// The function that we're calling cannot be vararg (only the intrinsic is).
1363+
Info.IsVarArg = false;
1364+
1365+
assert(std::all_of(SGPRArgs.Flags.begin(), SGPRArgs.Flags.end(),
1366+
[](ISD::ArgFlagsTy F) { return F.isInReg(); }) &&
1367+
"SGPR arguments should be marked inreg");
1368+
assert(std::none_of(VGPRArgs.Flags.begin(), VGPRArgs.Flags.end(),
1369+
[](ISD::ArgFlagsTy F) { return F.isInReg(); }) &&
1370+
"VGPR arguments should not be marked inreg");
1371+
1372+
SmallVector<ArgInfo, 8> OutArgs;
1373+
splitToValueTypes(SGPRArgs, OutArgs, DL, Info.CallConv);
1374+
splitToValueTypes(VGPRArgs, OutArgs, DL, Info.CallConv);
1375+
1376+
Info.IsMustTailCall = true;
1377+
return lowerTailCall(MIRBuilder, Info, OutArgs);
1378+
}
1379+
13061380
bool AMDGPUCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
13071381
CallLoweringInfo &Info) const {
1382+
if (Function *F = Info.CB->getCalledFunction())
1383+
if (F->isIntrinsic())
1384+
return F->getIntrinsicID() == Intrinsic::amdgcn_cs_chain &&
1385+
lowerChainCall(MIRBuilder, Info);
1386+
13081387
if (Info.IsVarArg) {
13091388
LLVM_DEBUG(dbgs() << "Variadic functions not implemented\n");
13101389
return false;
@@ -1395,7 +1474,8 @@ bool AMDGPUCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
13951474

13961475
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
13971476

1398-
handleImplicitCallArguments(MIRBuilder, MIB, ST, *MFI, ImplicitArgRegs);
1477+
handleImplicitCallArguments(MIRBuilder, MIB, ST, *MFI, Info.CallConv,
1478+
ImplicitArgRegs);
13991479

14001480
// Get a count of how many bytes are to be pushed on the stack.
14011481
unsigned NumBytes = CCInfo.getStackSize();

llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,10 +75,13 @@ class AMDGPUCallLowering final : public CallLowering {
7575
void handleImplicitCallArguments(
7676
MachineIRBuilder &MIRBuilder, MachineInstrBuilder &CallInst,
7777
const GCNSubtarget &ST, const SIMachineFunctionInfo &MFI,
78+
CallingConv::ID CalleeCC,
7879
ArrayRef<std::pair<MCRegister, Register>> ImplicitArgRegs) const;
7980

8081
bool lowerTailCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info,
8182
SmallVectorImpl<ArgInfo> &OutArgs) const;
83+
bool lowerChainCall(MachineIRBuilder &MIRBuilder,
84+
CallLoweringInfo &Info) const;
8285
bool lowerCall(MachineIRBuilder &MIRBuilder,
8386
CallLoweringInfo &Info) const override;
8487

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 51 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3261,6 +3261,9 @@ bool SITargetLowering::isEligibleForTailCallOptimization(
32613261
const SmallVectorImpl<ISD::OutputArg> &Outs,
32623262
const SmallVectorImpl<SDValue> &OutVals,
32633263
const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
3264+
if (AMDGPU::isChainCC(CalleeCC))
3265+
return true;
3266+
32643267
if (!mayTailCallThisCC(CalleeCC))
32653268
return false;
32663269

@@ -3345,15 +3348,43 @@ bool SITargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
33453348
// The wave scratch offset register is used as the global base pointer.
33463349
SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
33473350
SmallVectorImpl<SDValue> &InVals) const {
3351+
CallingConv::ID CallConv = CLI.CallConv;
3352+
bool IsChainCallConv = AMDGPU::isChainCC(CallConv);
3353+
33483354
SelectionDAG &DAG = CLI.DAG;
3355+
3356+
TargetLowering::ArgListEntry RequestedExec;
3357+
if (IsChainCallConv) {
3358+
// The last argument should be the value that we need to put in EXEC.
3359+
// Pop it out of CLI.Outs and CLI.OutVals before we do any processing so we
3360+
// don't treat it like the rest of the arguments.
3361+
RequestedExec = CLI.Args.back();
3362+
assert(RequestedExec.Node && "No node for EXEC");
3363+
3364+
if (!RequestedExec.Ty->isIntegerTy(Subtarget->getWavefrontSize()))
3365+
return lowerUnhandledCall(CLI, InVals, "Invalid value for EXEC");
3366+
3367+
assert(CLI.Outs.back().OrigArgIndex == 2 && "Unexpected last arg");
3368+
CLI.Outs.pop_back();
3369+
CLI.OutVals.pop_back();
3370+
3371+
if (RequestedExec.Ty->isIntegerTy(64)) {
3372+
assert(CLI.Outs.back().OrigArgIndex == 2 && "Exec wasn't split up");
3373+
CLI.Outs.pop_back();
3374+
CLI.OutVals.pop_back();
3375+
}
3376+
3377+
assert(CLI.Outs.back().OrigArgIndex != 2 &&
3378+
"Haven't popped all the pieces of the EXEC mask");
3379+
}
3380+
33493381
const SDLoc &DL = CLI.DL;
33503382
SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
33513383
SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
33523384
SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
33533385
SDValue Chain = CLI.Chain;
33543386
SDValue Callee = CLI.Callee;
33553387
bool &IsTailCall = CLI.IsTailCall;
3356-
CallingConv::ID CallConv = CLI.CallConv;
33573388
bool IsVarArg = CLI.IsVarArg;
33583389
bool IsSibCall = false;
33593390
bool IsThisReturn = false;
@@ -3384,9 +3415,10 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
33843415
if (IsTailCall) {
33853416
IsTailCall = isEligibleForTailCallOptimization(
33863417
Callee, CallConv, IsVarArg, Outs, OutVals, Ins, DAG);
3387-
if (!IsTailCall && CLI.CB && CLI.CB->isMustTailCall()) {
3418+
if (!IsTailCall &&
3419+
((CLI.CB && CLI.CB->isMustTailCall()) || IsChainCallConv)) {
33883420
report_fatal_error("failed to perform tail call elimination on a call "
3389-
"site marked musttail");
3421+
"site marked musttail or to llvm.amdgcn.cs.chain");
33903422
}
33913423

33923424
bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
@@ -3409,7 +3441,7 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
34093441
CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
34103442
CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, IsVarArg);
34113443

3412-
if (CallConv != CallingConv::AMDGPU_Gfx) {
3444+
if (CallConv != CallingConv::AMDGPU_Gfx && !AMDGPU::isChainCC(CallConv)) {
34133445
// With a fixed ABI, allocate fixed registers before user arguments.
34143446
passSpecialInputs(CLI, CCInfo, *Info, RegsToPass, MemOpChains, Chain);
34153447
}
@@ -3435,16 +3467,20 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
34353467

34363468
// Adjust the stack pointer for the new arguments...
34373469
// These operations are automatically eliminated by the prolog/epilog pass
3438-
if (!IsSibCall) {
3470+
if (!IsSibCall)
34393471
Chain = DAG.getCALLSEQ_START(Chain, 0, 0, DL);
34403472

3473+
if (!IsSibCall || IsChainCallConv) {
34413474
if (!Subtarget->enableFlatScratch()) {
34423475
SmallVector<SDValue, 4> CopyFromChains;
34433476

34443477
// In the HSA case, this should be an identity copy.
34453478
SDValue ScratchRSrcReg
34463479
= DAG.getCopyFromReg(Chain, DL, Info->getScratchRSrcReg(), MVT::v4i32);
3447-
RegsToPass.emplace_back(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, ScratchRSrcReg);
3480+
RegsToPass.emplace_back(IsChainCallConv
3481+
? AMDGPU::SGPR48_SGPR49_SGPR50_SGPR51
3482+
: AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3,
3483+
ScratchRSrcReg);
34483484
CopyFromChains.push_back(ScratchRSrcReg.getValue(1));
34493485
Chain = DAG.getTokenFactor(DL, CopyFromChains);
34503486
}
@@ -3570,6 +3606,15 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
35703606
InGlue = Chain.getValue(1);
35713607
}
35723608

3609+
auto *TRI = static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
3610+
3611+
if (IsChainCallConv) {
3612+
// Set EXEC right before the call.
3613+
MCRegister ExecReg = TRI->getExec();
3614+
Chain = DAG.getCopyToReg(Chain, DL, ExecReg, RequestedExec.Node, InGlue);
3615+
InGlue = Chain.getValue(1);
3616+
}
3617+
35733618
std::vector<SDValue> Ops;
35743619
Ops.push_back(Chain);
35753620
Ops.push_back(Callee);
@@ -3598,7 +3643,6 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
35983643

35993644
// Add a register mask operand representing the call-preserved registers.
36003645

3601-
auto *TRI = static_cast<const SIRegisterInfo*>(Subtarget->getRegisterInfo());
36023646
const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
36033647
assert(Mask && "Missing call preserved mask for calling convention");
36043648
Ops.push_back(DAG.getRegisterMask(Mask));

llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -421,6 +421,11 @@ const uint32_t *SIRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
421421
case CallingConv::AMDGPU_Gfx:
422422
return ST.hasGFX90AInsts() ? CSR_AMDGPU_SI_Gfx_GFX90AInsts_RegMask
423423
: CSR_AMDGPU_SI_Gfx_RegMask;
424+
case CallingConv::AMDGPU_CS_Chain:
425+
case CallingConv::AMDGPU_CS_ChainPreserve:
426+
// Calls to these functions never return, so we can pretend everything is
427+
// preserved.
428+
return AMDGPU_AllVGPRs_RegMask;
424429
default:
425430
return nullptr;
426431
}

0 commit comments

Comments
 (0)