-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[X86][GlobalISel] Support StructRet arguments #96629
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
We follow SelectionDAG and FastISel manner: set a register during formal arguments lowering. And use this register to insert a copy of StructRet argument to RAX register during return lowering. Fix a minor difference betwen GlobalISel and SelectionDAG when RAX register wasn't used and copy instruction may be deleted.
@llvm/pr-subscribers-backend-x86 @llvm/pr-subscribers-llvm-globalisel Author: Evgenii Kudriashov (e-kud) ChangesWe follow SelectionDAG and FastISel manner: set a register during formal arguments lowering and use this register to insert a copy of StructRet argument to RAX register during return lowering. Also add RAX register to RET instruction to fix a difference between GlobalISel and SelectionDAG, Full diff: https://github.com/llvm/llvm-project/pull/96629.diff 4 Files Affected:
diff --git a/llvm/lib/Target/X86/GISel/X86CallLowering.cpp b/llvm/lib/Target/X86/GISel/X86CallLowering.cpp
index 48830769fdf6c..4975582e89e31 100644
--- a/llvm/lib/Target/X86/GISel/X86CallLowering.cpp
+++ b/llvm/lib/Target/X86/GISel/X86CallLowering.cpp
@@ -16,6 +16,7 @@
#include "X86CallingConv.h"
#include "X86ISelLowering.h"
#include "X86InstrInfo.h"
+#include "X86MachineFunctionInfo.h"
#include "X86RegisterInfo.h"
#include "X86Subtarget.h"
#include "llvm/ADT/ArrayRef.h"
@@ -147,12 +148,17 @@ bool X86CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
"Return value without a vreg");
MachineFunction &MF = MIRBuilder.getMF();
auto MIB = MIRBuilder.buildInstrNoInsert(X86::RET).addImm(0);
- const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
- bool Is64Bit = STI.is64Bit();
+ auto FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
+ const auto &STI = MF.getSubtarget<X86Subtarget>();
+ Register RetReg = STI.is64Bit() ? X86::RAX : X86::EAX;
if (!FLI.CanLowerReturn) {
insertSRetStores(MIRBuilder, Val->getType(), VRegs, FLI.DemoteRegister);
- MIRBuilder.buildCopy(Is64Bit ? X86::RAX : X86::EAX, FLI.DemoteRegister);
+ MIRBuilder.buildCopy(RetReg, FLI.DemoteRegister);
+ MIB.addReg(RetReg);
+ } else if (Register Reg = FuncInfo->getSRetReturnReg()) {
+ MIRBuilder.buildCopy(RetReg, Reg);
+ MIB.addReg(RetReg);
} else if (!VRegs.empty()) {
const Function &F = MF.getFunction();
MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -258,6 +264,7 @@ bool X86CallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
MachineFunction &MF = MIRBuilder.getMF();
MachineRegisterInfo &MRI = MF.getRegInfo();
auto DL = MF.getDataLayout();
+ auto FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
SmallVector<ArgInfo, 8> SplitArgs;
@@ -273,12 +280,17 @@ bool X86CallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
// TODO: handle not simple cases.
if (Arg.hasAttribute(Attribute::ByVal) ||
Arg.hasAttribute(Attribute::InReg) ||
- Arg.hasAttribute(Attribute::StructRet) ||
Arg.hasAttribute(Attribute::SwiftSelf) ||
Arg.hasAttribute(Attribute::SwiftError) ||
Arg.hasAttribute(Attribute::Nest) || VRegs[Idx].size() > 1)
return false;
+ if (Arg.hasAttribute(Attribute::StructRet)) {
+ assert(VRegs[Idx].size() == 1 &&
+ "Unexpected amount of registers for sret argument.");
+ FuncInfo->setSRetReturnReg(VRegs[Idx][0]);
+ }
+
ArgInfo OrigArg(VRegs[Idx], Arg.getType(), Idx);
setArgFlags(OrigArg, Idx + AttributeList::FirstArgIndex, DL, F);
splitToValueTypes(OrigArg, SplitArgs, DL, F.getCallingConv());
diff --git a/llvm/test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll b/llvm/test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll
index 55e73dc5d29ec..a797c235c46f4 100644
--- a/llvm/test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll
+++ b/llvm/test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll
@@ -5,6 +5,7 @@
@a1_8bit = external global i8
@a7_8bit = external global i8
@a8_8bit = external global i8
+%struct.all = type { i8, i16, i32, i8, i16, i32, i64, float, double }
define i8 @test_i8_args_8(i8 %arg1, i8 %arg2, i8 %arg3, i8 %arg4, i8 %arg5, i8 %arg6, i8 %arg7, i8 %arg8) {
; X86-LABEL: name: test_i8_args_8
@@ -745,7 +746,7 @@ define <32 x float> @test_return_v32f32() {
; X86-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
; X86-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[LOAD]](p0) :: (store (<32 x s32>))
; X86-NEXT: $eax = COPY [[LOAD]](p0)
- ; X86-NEXT: RET 0
+ ; X86-NEXT: RET 0, $eax
;
; X64-LABEL: name: test_return_v32f32
; X64: bb.1 (%ir-block.0):
@@ -756,7 +757,7 @@ define <32 x float> @test_return_v32f32() {
; X64-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
; X64-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[COPY]](p0) :: (store (<32 x s32>))
; X64-NEXT: $rax = COPY [[COPY]](p0)
- ; X64-NEXT: RET 0
+ ; X64-NEXT: RET 0, $rax
ret <32 x float> zeroinitializer
}
@@ -793,3 +794,30 @@ define float @test_call_v32f32() {
%elt = extractelement <32 x float> %vect, i32 7
ret float %elt
}
+
+define void @test_sret(ptr sret(%struct.all) align 8 %result) #0 {
+ ; X86-LABEL: name: test_sret
+ ; X86: bb.1.entry:
+ ; X86-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
+ ; X86-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (p0) from %fixed-stack.0, align 16)
+ ; X86-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 104
+ ; X86-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY [[LOAD]](p0)
+ ; X86-NEXT: G_STORE [[C]](s8), [[COPY]](p0) :: (store (s8) into %ir.c, align 8)
+ ; X86-NEXT: $eax = COPY [[LOAD]](p0)
+ ; X86-NEXT: RET 0, $eax
+ ;
+ ; X64-LABEL: name: test_sret
+ ; X64: bb.1.entry:
+ ; X64-NEXT: liveins: $rdi
+ ; X64-NEXT: {{ $}}
+ ; X64-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $rdi
+ ; X64-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 104
+ ; X64-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY [[COPY]](p0)
+ ; X64-NEXT: G_STORE [[C]](s8), [[COPY1]](p0) :: (store (s8) into %ir.c, align 8)
+ ; X64-NEXT: $rax = COPY [[COPY]](p0)
+ ; X64-NEXT: RET 0, $rax
+entry:
+ %c = getelementptr inbounds %struct.all, ptr %result, i32 0, i32 0
+ store i8 104, ptr %c, align 8
+ ret void
+}
diff --git a/llvm/test/CodeGen/X86/isel-buildvector-sse.ll b/llvm/test/CodeGen/X86/isel-buildvector-sse.ll
index 5b96d57cf019b..7f580aad78764 100644
--- a/llvm/test/CodeGen/X86/isel-buildvector-sse.ll
+++ b/llvm/test/CodeGen/X86/isel-buildvector-sse.ll
@@ -22,22 +22,23 @@ define <8 x i32> @test_vector_v8i32() {
;
; SSE-X64-GISEL-LABEL: test_vector_v8i32:
; SSE-X64-GISEL: # %bb.0:
-; SSE-X64-GISEL-NEXT: movl $128100944, %eax # imm = 0x7A2AA50
-; SSE-X64-GISEL-NEXT: movl $-632258670, %ecx # imm = 0xDA507F92
-; SSE-X64-GISEL-NEXT: movl $-408980432, %edx # imm = 0xE79F7430
-; SSE-X64-GISEL-NEXT: movl $708630551, %esi # imm = 0x2A3CD817
+; SSE-X64-GISEL-NEXT: movq %rdi, %rax
+; SSE-X64-GISEL-NEXT: movl $128100944, %ecx # imm = 0x7A2AA50
+; SSE-X64-GISEL-NEXT: movl $-632258670, %edx # imm = 0xDA507F92
+; SSE-X64-GISEL-NEXT: movl $-408980432, %esi # imm = 0xE79F7430
+; SSE-X64-GISEL-NEXT: movl $708630551, %edi # imm = 0x2A3CD817
; SSE-X64-GISEL-NEXT: movl $-871899055, %r8d # imm = 0xCC07E051
; SSE-X64-GISEL-NEXT: movl $-633489957, %r9d # imm = 0xDA3DB5DB
; SSE-X64-GISEL-NEXT: movl $591019567, %r10d # imm = 0x233A3E2F
; SSE-X64-GISEL-NEXT: movl $708632899, %r11d # imm = 0x2A3CE143
-; SSE-X64-GISEL-NEXT: movl %eax, (%rdi)
-; SSE-X64-GISEL-NEXT: movl %ecx, 4(%rdi)
-; SSE-X64-GISEL-NEXT: movl %edx, 8(%rdi)
-; SSE-X64-GISEL-NEXT: movl %esi, 12(%rdi)
-; SSE-X64-GISEL-NEXT: movl %r8d, 16(%rdi)
-; SSE-X64-GISEL-NEXT: movl %r9d, 20(%rdi)
-; SSE-X64-GISEL-NEXT: movl %r10d, 24(%rdi)
-; SSE-X64-GISEL-NEXT: movl %r11d, 28(%rdi)
+; SSE-X64-GISEL-NEXT: movl %ecx, (%rax)
+; SSE-X64-GISEL-NEXT: movl %edx, 4(%rax)
+; SSE-X64-GISEL-NEXT: movl %esi, 8(%rax)
+; SSE-X64-GISEL-NEXT: movl %edi, 12(%rax)
+; SSE-X64-GISEL-NEXT: movl %r8d, 16(%rax)
+; SSE-X64-GISEL-NEXT: movl %r9d, 20(%rax)
+; SSE-X64-GISEL-NEXT: movl %r10d, 24(%rax)
+; SSE-X64-GISEL-NEXT: movl %r11d, 28(%rax)
; SSE-X64-GISEL-NEXT: retq
;
; SSE-X86-LABEL: test_vector_v8i32:
@@ -88,6 +89,7 @@ define <4 x i32> @test_vector_v4i32() {
;
; SSE-X64-GISEL-LABEL: test_vector_v4i32:
; SSE-X64-GISEL: # %bb.0:
+; SSE-X64-GISEL-NEXT: movq %rdi, %rax
; SSE-X64-GISEL-NEXT: movaps {{.*#+}} xmm0 = [128100944,3662708626,3885986864,708630551]
; SSE-X64-GISEL-NEXT: movaps %xmm0, (%rdi)
; SSE-X64-GISEL-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/isel-buildvector-sse2.ll b/llvm/test/CodeGen/X86/isel-buildvector-sse2.ll
index 88e0ede0d4b6f..da089dda6d03d 100644
--- a/llvm/test/CodeGen/X86/isel-buildvector-sse2.ll
+++ b/llvm/test/CodeGen/X86/isel-buildvector-sse2.ll
@@ -19,20 +19,21 @@ define <7 x i8> @test_vector_v7i8() {
;
; SSE2-GISEL-LABEL: test_vector_v7i8:
; SSE2-GISEL: # %bb.0:
-; SSE2-GISEL-NEXT: movb $4, %al
-; SSE2-GISEL-NEXT: movb $8, %cl
-; SSE2-GISEL-NEXT: movb $15, %dl
-; SSE2-GISEL-NEXT: movb $16, %sil
+; SSE2-GISEL-NEXT: movq %rdi, %rax
+; SSE2-GISEL-NEXT: movb $4, %cl
+; SSE2-GISEL-NEXT: movb $8, %dl
+; SSE2-GISEL-NEXT: movb $15, %sil
+; SSE2-GISEL-NEXT: movb $16, %dil
; SSE2-GISEL-NEXT: movb $23, %r8b
; SSE2-GISEL-NEXT: movb $42, %r9b
; SSE2-GISEL-NEXT: movb $63, %r10b
-; SSE2-GISEL-NEXT: movb %al, (%rdi)
-; SSE2-GISEL-NEXT: movb %cl, 1(%rdi)
-; SSE2-GISEL-NEXT: movb %dl, 2(%rdi)
-; SSE2-GISEL-NEXT: movb %sil, 3(%rdi)
-; SSE2-GISEL-NEXT: movb %r8b, 4(%rdi)
-; SSE2-GISEL-NEXT: movb %r9b, 5(%rdi)
-; SSE2-GISEL-NEXT: movb %r10b, 6(%rdi)
+; SSE2-GISEL-NEXT: movb %cl, (%rax)
+; SSE2-GISEL-NEXT: movb %dl, 1(%rax)
+; SSE2-GISEL-NEXT: movb %sil, 2(%rax)
+; SSE2-GISEL-NEXT: movb %dil, 3(%rax)
+; SSE2-GISEL-NEXT: movb %r8b, 4(%rax)
+; SSE2-GISEL-NEXT: movb %r9b, 5(%rax)
+; SSE2-GISEL-NEXT: movb %r10b, 6(%rax)
; SSE2-GISEL-NEXT: retq
ret <7 x i8> <i8 4, i8 8, i8 15, i8 16, i8 23, i8 42, i8 63>
}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does this cover the implicit sret case? Can you add a test that hits that condition?
Is this really X86 specific or does it belong into CallLowering.cpp? |
@arsenm, the implicit case was supported earlier. There is already a test for it with
@qcolombet hmm, interesting. This is ABI dependent and according to SelectionDAG comments
Also each of these targets implements So to generalize this approach:
Looks like keeping it in the target code is simpler. |
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/123/builds/1090 Here is the relevant piece of the build log for the reference:
|
We follow SelectionDAG and FastISel manner: set a register during formal arguments lowering and use this register to insert a copy of StructRet argument to RAX register during return lowering. Also add RAX register to RET instruction to fix a difference between GlobalISel and SelectionDAG, when the copy instruction could be deleted.
We follow SelectionDAG and FastISel manner: set a register during formal arguments lowering and use this register to insert a copy of StructRet argument to RAX register during return lowering. Also add RAX register to RET instruction to fix a difference between GlobalISel and SelectionDAG, when the copy instruction could be deleted.
We follow SelectionDAG and FastISel manner: set a register during formal arguments lowering and use this register to insert a copy of StructRet argument to RAX register during return lowering.
Also add RAX register to RET instruction to fix a difference between GlobalISel and SelectionDAG,
when the copy instruction could be deleted.