Skip to content

Commit 181279f

Browse files
committed
[X86][GlobalISel] Add support for sret demotion
The change add support for the cases when return value is passed in memory rathen than in registers. Differential Revision: https://reviews.llvm.org/D134181
1 parent bfcd536 commit 181279f

File tree

3 files changed

+94
-10
lines changed

3 files changed

+94
-10
lines changed

llvm/lib/Target/X86/X86CallLowering.cpp

Lines changed: 33 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include "llvm/ADT/SmallVector.h"
2323
#include "llvm/CodeGen/Analysis.h"
2424
#include "llvm/CodeGen/CallingConvLower.h"
25+
#include "llvm/CodeGen/FunctionLoweringInfo.h"
2526
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
2627
#include "llvm/CodeGen/GlobalISel/Utils.h"
2728
#include "llvm/CodeGen/LowLevelType.h"
@@ -129,15 +130,29 @@ struct X86OutgoingValueHandler : public CallLowering::OutgoingValueHandler {
129130

130131
} // end anonymous namespace
131132

133+
bool X86CallLowering::canLowerReturn(
134+
MachineFunction &MF, CallingConv::ID CallConv,
135+
SmallVectorImpl<CallLowering::BaseArgInfo> &Outs, bool IsVarArg) const {
136+
LLVMContext &Context = MF.getFunction().getContext();
137+
SmallVector<CCValAssign, 16> RVLocs;
138+
CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
139+
return checkReturn(CCInfo, Outs, RetCC_X86);
140+
}
141+
132142
bool X86CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
133143
const Value *Val, ArrayRef<Register> VRegs,
134144
FunctionLoweringInfo &FLI) const {
135145
assert(((Val && !VRegs.empty()) || (!Val && VRegs.empty())) &&
136146
"Return value without a vreg");
147+
MachineFunction &MF = MIRBuilder.getMF();
137148
auto MIB = MIRBuilder.buildInstrNoInsert(X86::RET).addImm(0);
149+
const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
150+
bool Is64Bit = STI.is64Bit();
138151

139-
if (!VRegs.empty()) {
140-
MachineFunction &MF = MIRBuilder.getMF();
152+
if (!FLI.CanLowerReturn) {
153+
insertSRetStores(MIRBuilder, Val->getType(), VRegs, FLI.DemoteRegister);
154+
MIRBuilder.buildCopy(Is64Bit ? X86::RAX : X86::EAX, FLI.DemoteRegister);
155+
} else if (!VRegs.empty()) {
141156
const Function &F = MF.getFunction();
142157
MachineRegisterInfo &MRI = MF.getRegInfo();
143158
const DataLayout &DL = MF.getDataLayout();
@@ -238,18 +253,19 @@ bool X86CallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
238253
const Function &F,
239254
ArrayRef<ArrayRef<Register>> VRegs,
240255
FunctionLoweringInfo &FLI) const {
241-
if (F.arg_empty())
242-
return true;
243-
244-
// TODO: handle variadic function
245-
if (F.isVarArg())
246-
return false;
247-
248256
MachineFunction &MF = MIRBuilder.getMF();
249257
MachineRegisterInfo &MRI = MF.getRegInfo();
250258
auto DL = MF.getDataLayout();
251259

252260
SmallVector<ArgInfo, 8> SplitArgs;
261+
262+
if (!FLI.CanLowerReturn)
263+
insertSRetIncomingArgument(F, SplitArgs, FLI.DemoteRegister, MRI, DL);
264+
265+
// TODO: handle variadic function
266+
if (F.isVarArg())
267+
return false;
268+
253269
unsigned Idx = 0;
254270
for (const auto &Arg : F.args()) {
255271
// TODO: handle not simple cases.
@@ -267,6 +283,9 @@ bool X86CallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
267283
Idx++;
268284
}
269285

286+
if (SplitArgs.empty())
287+
return true;
288+
270289
MachineBasicBlock &MBB = MIRBuilder.getMBB();
271290
if (!MBB.empty())
272291
MIRBuilder.setInstr(*MBB.begin());
@@ -363,7 +382,7 @@ bool X86CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
363382
// symmetry with the arguments, the physical register must be an
364383
// implicit-define of the call instruction.
365384

366-
if (!Info.OrigRet.Ty->isVoidTy()) {
385+
if (Info.CanLowerReturn && !Info.OrigRet.Ty->isVoidTy()) {
367386
if (Info.OrigRet.Regs.size() > 1)
368387
return false;
369388

@@ -391,5 +410,9 @@ bool X86CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
391410
.addImm(Assigner.getStackSize())
392411
.addImm(0 /* NumBytesForCalleeToPop */);
393412

413+
if (!Info.CanLowerReturn)
414+
insertSRetLoads(MIRBuilder, Info.OrigRet.Ty, Info.OrigRet.Regs,
415+
Info.DemoteRegister, Info.DemoteStackIndex);
416+
394417
return true;
395418
}

llvm/lib/Target/X86/X86CallLowering.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,10 @@ class X86CallLowering : public CallLowering {
3636

3737
bool lowerCall(MachineIRBuilder &MIRBuilder,
3838
CallLoweringInfo &Info) const override;
39+
40+
bool canLowerReturn(MachineFunction &MF, CallingConv::ID CallConv,
41+
SmallVectorImpl<BaseArgInfo> &Outs,
42+
bool IsVarArg) const override;
3943
};
4044

4145
} // end namespace llvm

llvm/test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -716,3 +716,60 @@ define void @test_variadic_call_2(ptr %addr_ptr, ptr %val_ptr) {
716716
call void (ptr, ...) @variadic_callee(ptr %addr, double %val)
717717
ret void
718718
}
719+
720+
; Return value is in memory unless subtarget is AVX or higher.
721+
define <32 x float> @test_return_v32f32() {
722+
; X86-LABEL: name: test_return_v32f32
723+
; X86: bb.1 (%ir-block.0):
724+
; X86-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
725+
; X86-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s32) from %fixed-stack.0, align 16)
726+
; X86-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
727+
; X86-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
728+
; X86-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[LOAD]](p0) :: (store (<32 x s32>))
729+
; X86-NEXT: $eax = COPY [[LOAD]](p0)
730+
; X86-NEXT: RET 0
731+
; X64-LABEL: name: test_return_v32f32
732+
; X64: bb.1 (%ir-block.0):
733+
; X64-NEXT: liveins: $rdi
734+
; X64-NEXT: {{ $}}
735+
; X64-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $rdi
736+
; X64-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
737+
; X64-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
738+
; X64-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[COPY]](p0) :: (store (<32 x s32>))
739+
; X64-NEXT: $rax = COPY [[COPY]](p0)
740+
; X64-NEXT: RET 0
741+
ret <32 x float> zeroinitializer
742+
}
743+
744+
define float @test_call_v32f32() {
745+
; X86-LABEL: name: test_call_v32f32
746+
; X86: bb.1 (%ir-block.0):
747+
; X86-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
748+
; X86-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
749+
; X86-NEXT: ADJCALLSTACKDOWN32 4, 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
750+
; X86-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $esp
751+
; X86-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
752+
; X86-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s32)
753+
; X86-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (p0) into stack, align 1)
754+
; X86-NEXT: CALLpcrel32 @test_return_v32f32, csr_32, implicit $esp, implicit $ssp
755+
; X86-NEXT: ADJCALLSTACKUP32 4, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
756+
; X86-NEXT: [[LOAD:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[FRAME_INDEX]](p0) :: (load (<32 x s32>) from %stack.0)
757+
; X86-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<32 x s32>), [[C]](s32)
758+
; X86-NEXT: $fp0 = COPY [[EVEC]](s32)
759+
; X86-NEXT: RET 0, implicit $fp0
760+
; X64-LABEL: name: test_call_v32f32
761+
; X64: bb.1 (%ir-block.0):
762+
; X64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
763+
; X64-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
764+
; X64-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
765+
; X64-NEXT: $rdi = COPY [[FRAME_INDEX]](p0)
766+
; X64-NEXT: CALL64pcrel32 @test_return_v32f32, csr_64, implicit $rsp, implicit $ssp, implicit $rdi
767+
; X64-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
768+
; X64-NEXT: [[LOAD:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[FRAME_INDEX]](p0) :: (load (<32 x s32>) from %stack.0)
769+
; X64-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<32 x s32>), [[C]](s64)
770+
; X64-NEXT: $xmm0 = COPY [[EVEC]](s32)
771+
; X64-NEXT: RET 0, implicit $xmm0
772+
%vect = call <32 x float> @test_return_v32f32()
773+
%elt = extractelement <32 x float> %vect, i32 7
774+
ret float %elt
775+
}

0 commit comments

Comments
 (0)