Skip to content

Commit 6b7d5a9

Browse files
committed
AMDGPU/GlobalISel: Start cleaning up calling convention lowering
There are various hacks working around limitations in handleAssignments, and the logical split between different parts isn't correct. Start separating the type legalization to satisfy going through the DAG infrastructure from the code required to split into register types. The type splitting should be moved to generic code.
1 parent ebfe4de commit 6b7d5a9

File tree

4 files changed

+179
-103
lines changed

4 files changed

+179
-103
lines changed

llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp

Lines changed: 144 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -248,18 +248,18 @@ struct AMDGPUOutgoingArgHandler : public AMDGPUValueHandler {
248248
}
249249

250250
void assignValueToAddress(const CallLowering::ArgInfo &Arg, Register Addr,
251-
uint64_t Size, MachinePointerInfo &MPO,
251+
uint64_t MemSize, MachinePointerInfo &MPO,
252252
CCValAssign &VA) override {
253253
Register ValVReg = VA.getLocInfo() != CCValAssign::LocInfo::FPExt
254254
? extendRegister(Arg.Regs[0], VA)
255255
: Arg.Regs[0];
256256

257-
// If we extended we might need to adjust the MMO's Size.
257+
// If we extended the value type we might need to adjust the MMO's
258+
// Size. This happens if ComputeValueVTs widened a small type value to a
259+
// legal register type (e.g. s8->s16)
258260
const LLT RegTy = MRI.getType(ValVReg);
259-
if (RegTy.getSizeInBytes() > Size)
260-
Size = RegTy.getSizeInBytes();
261-
262-
assignValueToAddress(ValVReg, Addr, Size, MPO, VA);
261+
MemSize = std::min(MemSize, (uint64_t)RegTy.getSizeInBytes());
262+
assignValueToAddress(ValVReg, Addr, MemSize, MPO, VA);
263263
}
264264
};
265265
}
@@ -282,57 +282,72 @@ static ISD::NodeType extOpcodeToISDExtOpcode(unsigned MIOpc) {
282282
}
283283
}
284284

285-
void AMDGPUCallLowering::splitToValueTypes(
286-
MachineIRBuilder &B,
287-
const ArgInfo &OrigArg,
288-
SmallVectorImpl<ArgInfo> &SplitArgs,
289-
const DataLayout &DL, CallingConv::ID CallConv,
290-
bool IsOutgoing,
291-
SplitArgTy PerformArgSplit) const {
285+
// FIXME: This should move to generic code.
286+
void AMDGPUCallLowering::splitToValueTypes(MachineIRBuilder &B,
287+
const ArgInfo &OrigArg,
288+
SmallVectorImpl<ArgInfo> &SplitArgs,
289+
const DataLayout &DL,
290+
CallingConv::ID CallConv) const {
292291
const SITargetLowering &TLI = *getTLI<SITargetLowering>();
293292
LLVMContext &Ctx = OrigArg.Ty->getContext();
294293

295-
if (OrigArg.Ty->isVoidTy())
296-
return;
297-
298294
SmallVector<EVT, 4> SplitVTs;
299295
ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs);
300296

301297
assert(OrigArg.Regs.size() == SplitVTs.size());
302298

303-
int SplitIdx = 0;
304-
for (EVT VT : SplitVTs) {
305-
Register Reg = OrigArg.Regs[SplitIdx];
306-
Type *Ty = VT.getTypeForEVT(Ctx);
307-
LLT LLTy = getLLTForType(*Ty, DL);
299+
if (SplitVTs.size() == 0)
300+
return;
308301

309-
if (IsOutgoing && VT.isScalarInteger()) {
310-
unsigned ExtendOp = TargetOpcode::G_ANYEXT;
311-
if (OrigArg.Flags[0].isSExt()) {
312-
assert(OrigArg.Regs.size() == 1 && "expect only simple return values");
313-
ExtendOp = TargetOpcode::G_SEXT;
314-
} else if (OrigArg.Flags[0].isZExt()) {
315-
assert(OrigArg.Regs.size() == 1 && "expect only simple return values");
316-
ExtendOp = TargetOpcode::G_ZEXT;
317-
}
302+
if (SplitVTs.size() == 1) {
303+
// No splitting to do, but we want to replace the original type (e.g. [1 x
304+
// double] -> double).
305+
SplitArgs.emplace_back(OrigArg.Regs[0], SplitVTs[0].getTypeForEVT(Ctx),
306+
OrigArg.Flags[0], OrigArg.IsFixed);
307+
return;
308+
}
318309

319-
EVT ExtVT = TLI.getTypeForExtReturn(Ctx, VT,
320-
extOpcodeToISDExtOpcode(ExtendOp));
321-
if (ExtVT.getSizeInBits() != VT.getSizeInBits()) {
322-
VT = ExtVT;
323-
Ty = ExtVT.getTypeForEVT(Ctx);
324-
LLTy = getLLTForType(*Ty, DL);
325-
Reg = B.buildInstr(ExtendOp, {LLTy}, {Reg}).getReg(0);
326-
}
327-
}
310+
// Create one ArgInfo for each virtual register in the original ArgInfo.
311+
assert(OrigArg.Regs.size() == SplitVTs.size() && "Regs / types mismatch");
312+
313+
bool NeedsRegBlock = TLI.functionArgumentNeedsConsecutiveRegisters(
314+
OrigArg.Ty, CallConv, false);
315+
for (unsigned i = 0, e = SplitVTs.size(); i < e; ++i) {
316+
Type *SplitTy = SplitVTs[i].getTypeForEVT(Ctx);
317+
SplitArgs.emplace_back(OrigArg.Regs[i], SplitTy, OrigArg.Flags[0],
318+
OrigArg.IsFixed);
319+
if (NeedsRegBlock)
320+
SplitArgs.back().Flags[0].setInConsecutiveRegs();
321+
}
322+
323+
SplitArgs.back().Flags[0].setInConsecutiveRegsLast();
324+
}
325+
326+
void AMDGPUCallLowering::processSplitArgs(
327+
MachineIRBuilder &B, const ArgInfo &OrigArg,
328+
const SmallVectorImpl<ArgInfo> &SplitArg,
329+
SmallVectorImpl<ArgInfo> &SplitArgs, const DataLayout &DL,
330+
CallingConv::ID CallConv, bool IsOutgoing,
331+
SplitArgTy PerformArgSplit) const {
332+
LLVMContext &Ctx = OrigArg.Ty->getContext();
333+
const SITargetLowering &TLI = *getTLI<SITargetLowering>();
334+
335+
// FIXME: This is mostly nasty pre-processing before handleAssignments. Most
336+
// of this should be performed by handleAssignments.
337+
338+
int SplitIdx = 0;
339+
for (const ArgInfo &SplitArg : SplitArg) {
340+
Register Reg = OrigArg.Regs[SplitIdx];
341+
EVT VT = EVT::getEVT(SplitArg.Ty);
342+
LLT LLTy = getLLTForType(*SplitArg.Ty, DL);
328343

329344
unsigned NumParts = TLI.getNumRegistersForCallingConv(Ctx, CallConv, VT);
330345
MVT RegVT = TLI.getRegisterTypeForCallingConv(Ctx, CallConv, VT);
331346

332347
if (NumParts == 1) {
333348
// No splitting to do, but we want to replace the original type (e.g. [1 x
334349
// double] -> double).
335-
SplitArgs.emplace_back(Reg, Ty, OrigArg.Flags, OrigArg.IsFixed);
350+
SplitArgs.emplace_back(Reg, SplitArg.Ty, OrigArg.Flags, OrigArg.IsFixed);
336351

337352
++SplitIdx;
338353
continue;
@@ -425,22 +440,68 @@ bool AMDGPUCallLowering::lowerReturnVal(MachineIRBuilder &B,
425440
const auto &F = MF.getFunction();
426441
const DataLayout &DL = MF.getDataLayout();
427442
MachineRegisterInfo *MRI = B.getMRI();
443+
LLVMContext &Ctx = F.getContext();
428444

429445
CallingConv::ID CC = F.getCallingConv();
430446
const SITargetLowering &TLI = *getTLI<SITargetLowering>();
431447

432-
ArgInfo OrigRetInfo(VRegs, Val->getType());
433-
setArgFlags(OrigRetInfo, AttributeList::ReturnIndex, DL, F);
434-
SmallVector<ArgInfo, 4> SplitRetInfos;
448+
SmallVector<EVT, 8> SplitEVTs;
449+
ComputeValueVTs(TLI, DL, Val->getType(), SplitEVTs);
450+
assert(VRegs.size() == SplitEVTs.size() &&
451+
"For each split Type there should be exactly one VReg.");
452+
453+
// We pre-process the return value decomposed into EVTs.
454+
SmallVector<ArgInfo, 8> PreSplitRetInfos;
435455

436-
splitToValueTypes(
437-
B, OrigRetInfo, SplitRetInfos, DL, CC, true,
438-
[&](ArrayRef<Register> Regs, Register SrcReg, LLT LLTy, LLT PartLLT,
439-
int VTSplitIdx) {
440-
unpackRegsToOrigType(B, Regs, SrcReg,
441-
SplitRetInfos[VTSplitIdx],
442-
LLTy, PartLLT);
443-
});
456+
// Further processing is applied to split the arguments from PreSplitRetInfos
457+
// into 32-bit pieces in SplitRetInfos before passing off to
458+
// handleAssignments.
459+
SmallVector<ArgInfo, 8> SplitRetInfos;
460+
461+
for (unsigned i = 0; i < SplitEVTs.size(); ++i) {
462+
EVT VT = SplitEVTs[i];
463+
Register Reg = VRegs[i];
464+
ArgInfo RetInfo(Reg, VT.getTypeForEVT(Ctx));
465+
setArgFlags(RetInfo, AttributeList::ReturnIndex, DL, F);
466+
467+
if (VT.isScalarInteger()) {
468+
unsigned ExtendOp = TargetOpcode::G_ANYEXT;
469+
if (RetInfo.Flags[0].isSExt()) {
470+
assert(RetInfo.Regs.size() == 1 && "expect only simple return values");
471+
ExtendOp = TargetOpcode::G_SEXT;
472+
} else if (RetInfo.Flags[0].isZExt()) {
473+
assert(RetInfo.Regs.size() == 1 && "expect only simple return values");
474+
ExtendOp = TargetOpcode::G_ZEXT;
475+
}
476+
477+
EVT ExtVT = TLI.getTypeForExtReturn(Ctx, VT,
478+
extOpcodeToISDExtOpcode(ExtendOp));
479+
if (ExtVT != VT) {
480+
RetInfo.Ty = ExtVT.getTypeForEVT(Ctx);
481+
LLT ExtTy = getLLTForType(*RetInfo.Ty, DL);
482+
Reg = B.buildInstr(ExtendOp, {ExtTy}, {Reg}).getReg(0);
483+
}
484+
}
485+
486+
if (Reg != RetInfo.Regs[0]) {
487+
RetInfo.Regs[0] = Reg;
488+
// Reset the arg flags after modifying Reg.
489+
setArgFlags(RetInfo, AttributeList::ReturnIndex, DL, F);
490+
}
491+
492+
splitToValueTypes(B, RetInfo, PreSplitRetInfos, DL, CC);
493+
494+
// FIXME: This splitting should mostly be done by handleAssignments
495+
processSplitArgs(B, RetInfo,
496+
PreSplitRetInfos, SplitRetInfos, DL, CC, true,
497+
[&](ArrayRef<Register> Regs, Register SrcReg, LLT LLTy,
498+
LLT PartLLT, int VTSplitIdx) {
499+
unpackRegsToOrigType(B, Regs, SrcReg,
500+
PreSplitRetInfos[VTSplitIdx], LLTy,
501+
PartLLT);
502+
});
503+
PreSplitRetInfos.clear();
504+
}
444505

445506
CCAssignFn *AssignFn = TLI.CCAssignFnForReturn(CC, F.isVarArg());
446507
AMDGPUOutgoingValueHandler RetHandler(B, *MRI, Ret, AssignFn);
@@ -814,7 +875,7 @@ bool AMDGPUCallLowering::lowerFormalArguments(
814875
CCInfo.AllocateReg(ImplicitBufferPtrReg);
815876
}
816877

817-
878+
SmallVector<ArgInfo, 8> SplitArg;
818879
SmallVector<ArgInfo, 32> SplitArgs;
819880
unsigned Idx = 0;
820881
unsigned PSInputNum = 0;
@@ -859,16 +920,18 @@ bool AMDGPUCallLowering::lowerFormalArguments(
859920
const unsigned OrigArgIdx = Idx + AttributeList::FirstArgIndex;
860921
setArgFlags(OrigArg, OrigArgIdx, DL, F);
861922

862-
splitToValueTypes(
863-
B, OrigArg, SplitArgs, DL, CC, false,
864-
// FIXME: We should probably be passing multiple registers to
865-
// handleAssignments to do this
866-
[&](ArrayRef<Register> Regs, Register DstReg,
867-
LLT LLTy, LLT PartLLT, int VTSplitIdx) {
868-
assert(DstReg == VRegs[Idx][VTSplitIdx]);
869-
packSplitRegsToOrigType(B, VRegs[Idx][VTSplitIdx], Regs,
870-
LLTy, PartLLT);
871-
});
923+
SplitArg.clear();
924+
splitToValueTypes(B, OrigArg, SplitArg, DL, CC);
925+
926+
processSplitArgs(B, OrigArg, SplitArg, SplitArgs, DL, CC, false,
927+
// FIXME: We should probably be passing multiple registers
928+
// to handleAssignments to do this
929+
[&](ArrayRef<Register> Regs, Register DstReg, LLT LLTy,
930+
LLT PartLLT, int VTSplitIdx) {
931+
assert(DstReg == VRegs[Idx][VTSplitIdx]);
932+
packSplitRegsToOrigType(B, VRegs[Idx][VTSplitIdx], Regs,
933+
LLTy, PartLLT);
934+
});
872935

873936
++Idx;
874937
}
@@ -1159,17 +1222,21 @@ bool AMDGPUCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
11591222
}
11601223

11611224
SmallVector<ArgInfo, 8> OutArgs;
1162-
SmallVector<ArgInfo, 4> SplitRetInfos;
11631225

1226+
SmallVector<ArgInfo, 8> SplitArg;
11641227
for (auto &OrigArg : Info.OrigArgs) {
1165-
splitToValueTypes(
1166-
MIRBuilder, OrigArg, OutArgs, DL, Info.CallConv, true,
1228+
splitToValueTypes(MIRBuilder, OrigArg, SplitArg, DL, Info.CallConv);
1229+
1230+
processSplitArgs(
1231+
MIRBuilder, OrigArg, SplitArg, OutArgs, DL, Info.CallConv, true,
11671232
// FIXME: We should probably be passing multiple registers to
11681233
// handleAssignments to do this
11691234
[&](ArrayRef<Register> Regs, Register SrcReg, LLT LLTy, LLT PartLLT,
11701235
int VTSplitIdx) {
11711236
unpackRegsToOrigType(MIRBuilder, Regs, SrcReg, OrigArg, LLTy, PartLLT);
11721237
});
1238+
1239+
SplitArg.clear();
11731240
}
11741241

11751242
// If we can lower as a tail call, do that instead.
@@ -1269,14 +1336,19 @@ bool AMDGPUCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
12691336

12701337
SmallVector<ArgInfo, 8> InArgs;
12711338
if (!Info.OrigRet.Ty->isVoidTy()) {
1339+
SmallVector<ArgInfo, 8> PreSplitRetInfos;
1340+
12721341
splitToValueTypes(
1273-
MIRBuilder, Info.OrigRet, InArgs, DL, Info.CallConv, false,
1274-
[&](ArrayRef<Register> Regs, Register DstReg,
1275-
LLT LLTy, LLT PartLLT, int VTSplitIdx) {
1276-
assert(DstReg == Info.OrigRet.Regs[VTSplitIdx]);
1277-
packSplitRegsToOrigType(MIRBuilder, Info.OrigRet.Regs[VTSplitIdx],
1278-
Regs, LLTy, PartLLT);
1279-
});
1342+
MIRBuilder, Info.OrigRet, PreSplitRetInfos/*InArgs*/, DL, Info.CallConv);
1343+
1344+
processSplitArgs(MIRBuilder, Info.OrigRet,
1345+
PreSplitRetInfos, InArgs/*SplitRetInfos*/, DL, Info.CallConv, false,
1346+
[&](ArrayRef<Register> Regs, Register DstReg,
1347+
LLT LLTy, LLT PartLLT, int VTSplitIdx) {
1348+
assert(DstReg == Info.OrigRet.Regs[VTSplitIdx]);
1349+
packSplitRegsToOrigType(MIRBuilder, Info.OrigRet.Regs[VTSplitIdx],
1350+
Regs, LLTy, PartLLT);
1351+
});
12801352
}
12811353

12821354
// Make sure the raw argument copies are inserted before the marshalling to

llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -32,13 +32,16 @@ class AMDGPUCallLowering final : public CallLowering {
3232
/// A function of this type is used to perform value split action.
3333
using SplitArgTy = std::function<void(ArrayRef<Register>, Register, LLT, LLT, int)>;
3434

35-
void splitToValueTypes(MachineIRBuilder &B,
36-
const ArgInfo &OrigArgInfo,
35+
void splitToValueTypes(MachineIRBuilder &B, const ArgInfo &OrigArgInfo,
3736
SmallVectorImpl<ArgInfo> &SplitArgs,
38-
const DataLayout &DL,
39-
CallingConv::ID CallConv,
40-
bool IsOutgoing,
41-
SplitArgTy SplitArg) const;
37+
const DataLayout &DL, CallingConv::ID CallConv) const;
38+
39+
void processSplitArgs(MachineIRBuilder &B, const ArgInfo &OrigArgInfo,
40+
const SmallVectorImpl<ArgInfo> &SplitArg,
41+
SmallVectorImpl<ArgInfo> &SplitArgs,
42+
const DataLayout &DL, CallingConv::ID CallConv,
43+
bool IsOutgoing,
44+
SplitArgTy PerformArgSplit) const;
4245

4346
bool lowerReturnVal(MachineIRBuilder &B, const Value *Val,
4447
ArrayRef<Register> VRegs, MachineInstrBuilder &Ret) const;

llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,9 +76,9 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32() #0 {
7676
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
7777
; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64)
7878
; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 4 from %ir.ptr0 + 4, addrspace 1)
79-
; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD1]](s8)
8079
; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc
8180
; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_gfx_void_func_struct_i8_i32
81+
; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD1]](s8)
8282
; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
8383
; CHECK: $vgpr1 = COPY [[LOAD2]](s32)
8484
; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
@@ -104,9 +104,9 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32_inreg() #
104104
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
105105
; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64)
106106
; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 4 from %ir.ptr0 + 4, addrspace 1)
107-
; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD1]](s8)
108107
; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc
109108
; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_gfx_void_func_struct_i8_i32_inreg
109+
; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD1]](s8)
110110
; CHECK: $sgpr4 = COPY [[ANYEXT]](s32)
111111
; CHECK: $sgpr5 = COPY [[LOAD2]](s32)
112112
; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3

0 commit comments

Comments
 (0)