Skip to content

Commit 08b2d0a

Browse files
committed
[X86] Disable copy elision in LowerMemArgument for scalarized vectors when the loc VT is a different size than the original element.
For example a v4f16 argument is scalarized to 4 i32 values. So the values are spread out instead of being packed tightly like in the original vector. Fixes PR47000.
1 parent 13796d1 commit 08b2d0a

File tree

2 files changed

+18
-10
lines changed

2 files changed

+18
-10
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3203,13 +3203,23 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
32033203
return DAG.getFrameIndex(FI, PtrVT);
32043204
}
32053205

3206+
EVT ArgVT = Ins[i].ArgVT;
3207+
3208+
// If this is a vector that has been split into multiple parts, and the
3209+
// scalar size of the parts don't match the vector element size, then we can't
3210+
// elide the copy. The parts will have padding between them instead of being
3211+
// packed like a vector.
3212+
bool ScalarizedAndExtendedVector =
3213+
ArgVT.isVector() && !VA.getLocVT().isVector() &&
3214+
VA.getLocVT().getSizeInBits() != ArgVT.getScalarSizeInBits();
3215+
32063216
// This is an argument in memory. We might be able to perform copy elision.
32073217
// If the argument is passed directly in memory without any extension, then we
32083218
// can perform copy elision. Large vector types, for example, may be passed
32093219
// indirectly by pointer.
32103220
if (Flags.isCopyElisionCandidate() &&
3211-
VA.getLocInfo() != CCValAssign::Indirect && !ExtendedInMem) {
3212-
EVT ArgVT = Ins[i].ArgVT;
3221+
VA.getLocInfo() != CCValAssign::Indirect && !ExtendedInMem &&
3222+
!ScalarizedAndExtendedVector) {
32133223
SDValue PartAddr;
32143224
if (Ins[i].PartOffset == 0) {
32153225
// If this is a one-part value or the first part of a multi-part value,

llvm/test/CodeGen/X86/pr47000.ll

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,17 +16,15 @@ define <4 x half> @doTheTestMod(<4 x half> %0, <4 x half> %1) nounwind {
1616
; CHECK-NEXT: movl %eax, %ecx
1717
; CHECK-NEXT: movw 176(%esp), %dx
1818
; CHECK-NEXT: movw 172(%esp), %si
19-
; CHECK-NEXT: movw 164(%esp), %di
20-
; CHECK-NEXT: movw 166(%esp), %bx
19+
; CHECK-NEXT: movw 168(%esp), %di
20+
; CHECK-NEXT: movw 164(%esp), %bx
2121
; CHECK-NEXT: movw 160(%esp), %bp
2222
; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
2323
; CHECK-NEXT: movw 156(%esp), %ax
2424
; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill
25-
; CHECK-NEXT: movw 148(%esp), %ax
26-
; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill
27-
; CHECK-NEXT: movw 150(%esp), %ax
25+
; CHECK-NEXT: movw 152(%esp), %ax
2826
; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill
29-
; CHECK-NEXT: movw {{[-0-9]+}}(%e{{[sb]}}p), %ax # 2-byte Reload
27+
; CHECK-NEXT: movw 148(%esp), %ax
3028
; CHECK-NEXT: movw %ax, 112(%esp)
3129
; CHECK-NEXT: movw {{[-0-9]+}}(%e{{[sb]}}p), %ax # 2-byte Reload
3230
; CHECK-NEXT: movw %ax, 114(%esp)
@@ -35,8 +33,8 @@ define <4 x half> @doTheTestMod(<4 x half> %0, <4 x half> %1) nounwind {
3533
; CHECK-NEXT: movw %bp, 118(%esp)
3634
; CHECK-NEXT: movw %dx, 110(%esp)
3735
; CHECK-NEXT: movw %si, 108(%esp)
38-
; CHECK-NEXT: movw %bx, 106(%esp)
39-
; CHECK-NEXT: movw %di, 104(%esp)
36+
; CHECK-NEXT: movw %di, 106(%esp)
37+
; CHECK-NEXT: movw %bx, 104(%esp)
4038
; CHECK-NEXT: movzwl 118(%esp), %edx
4139
; CHECK-NEXT: movzwl 116(%esp), %esi
4240
; CHECK-NEXT: movzwl 114(%esp), %edi

0 commit comments

Comments
 (0)