Skip to content

Commit a5fa778

Browse files
committed
[LegalizeTypes] Scalarize non-byte sized loads in WidenRecRes_Load and SplitVecResLoad
Should fix PR42803 and PR44902 Differential Revision: https://reviews.llvm.org/D74590
1 parent 0ed4744 commit a5fa778

File tree

4 files changed

+85
-31
lines changed

4 files changed

+85
-31
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1505,6 +1505,14 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
15051505
EVT LoMemVT, HiMemVT;
15061506
std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
15071507

1508+
if (!LoMemVT.isByteSized() || !HiMemVT.isByteSized()) {
1509+
SDValue Value, NewChain;
1510+
std::tie(Value, NewChain) = TLI.scalarizeVectorLoad(LD, DAG);
1511+
std::tie(Lo, Hi) = DAG.SplitVector(Value, dl);
1512+
ReplaceValueWith(SDValue(LD, 1), NewChain);
1513+
return;
1514+
}
1515+
15081516
Lo = DAG.getLoad(ISD::UNINDEXED, ExtType, LoVT, dl, Ch, Ptr, Offset,
15091517
LD->getPointerInfo(), LoMemVT, Alignment, MMOFlags, AAInfo);
15101518

@@ -3667,6 +3675,20 @@ SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {
36673675
LoadSDNode *LD = cast<LoadSDNode>(N);
36683676
ISD::LoadExtType ExtType = LD->getExtensionType();
36693677

3678+
// A vector must always be stored in memory as-is, i.e. without any padding
3679+
// between the elements, since various code depend on it, e.g. in the
3680+
// handling of a bitcast of a vector type to int, which may be done with a
3681+
// vector store followed by an integer load. A vector that does not have
3682+
// elements that are byte-sized must therefore be stored as an integer
3683+
// built out of the extracted vector elements.
3684+
if (!LD->getMemoryVT().isByteSized()) {
3685+
SDValue Value, NewChain;
3686+
std::tie(Value, NewChain) = TLI.scalarizeVectorLoad(LD, DAG);
3687+
ReplaceValueWith(SDValue(LD, 0), Value);
3688+
ReplaceValueWith(SDValue(LD, 1), NewChain);
3689+
return SDValue();
3690+
}
3691+
36703692
SDValue Result;
36713693
SmallVector<SDValue, 16> LdChain; // Chain for the series of load
36723694
if (ExtType != ISD::NON_EXTLOAD)

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 38 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6584,12 +6584,48 @@ TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
65846584
SDValue Chain = LD->getChain();
65856585
SDValue BasePTR = LD->getBasePtr();
65866586
EVT SrcVT = LD->getMemoryVT();
6587+
EVT DstVT = LD->getValueType(0);
65876588
ISD::LoadExtType ExtType = LD->getExtensionType();
65886589

65896590
unsigned NumElem = SrcVT.getVectorNumElements();
65906591

65916592
EVT SrcEltVT = SrcVT.getScalarType();
6592-
EVT DstEltVT = LD->getValueType(0).getScalarType();
6593+
EVT DstEltVT = DstVT.getScalarType();
6594+
6595+
// A vector must always be stored in memory as-is, i.e. without any padding
6596+
// between the elements, since various code depend on it, e.g. in the
6597+
// handling of a bitcast of a vector type to int, which may be done with a
6598+
// vector store followed by an integer load. A vector that does not have
6599+
// elements that are byte-sized must therefore be stored as an integer
6600+
// built out of the extracted vector elements.
6601+
if (!SrcEltVT.isByteSized()) {
6602+
unsigned NumBits = SrcVT.getSizeInBits();
6603+
EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumBits);
6604+
6605+
SDValue Load = DAG.getLoad(IntVT, SL, Chain, BasePTR, LD->getPointerInfo(),
6606+
LD->getAlignment(),
6607+
LD->getMemOperand()->getFlags(),
6608+
LD->getAAInfo());
6609+
6610+
SmallVector<SDValue, 8> Vals;
6611+
for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
6612+
unsigned ShiftIntoIdx =
6613+
(DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
6614+
SDValue ShiftAmount =
6615+
DAG.getConstant(ShiftIntoIdx * SrcEltVT.getSizeInBits(), SL, IntVT);
6616+
SDValue ShiftedElt =
6617+
DAG.getNode(ISD::SRL, SL, IntVT, Load, ShiftAmount);
6618+
SDValue Scalar = DAG.getNode(ISD::TRUNCATE, SL, SrcEltVT, ShiftedElt);
6619+
if (ExtType != ISD::NON_EXTLOAD) {
6620+
unsigned ExtendOp = ISD::getExtForLoadExtType(false, ExtType);
6621+
Scalar = DAG.getNode(ExtendOp, SL, DstEltVT, Scalar);
6622+
}
6623+
Vals.push_back(Scalar);
6624+
}
6625+
6626+
SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
6627+
return std::make_pair(Value, Load.getValue(1));
6628+
}
65936629

65946630
unsigned Stride = SrcEltVT.getSizeInBits() / 8;
65956631
assert(SrcEltVT.isByteSized());
@@ -6611,7 +6647,7 @@ TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
66116647
}
66126648

66136649
SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
6614-
SDValue Value = DAG.getBuildVector(LD->getValueType(0), SL, Vals);
6650+
SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
66156651

66166652
return std::make_pair(Value, NewChain);
66176653
}

llvm/test/CodeGen/SystemZ/store_nonbytesized_vecs.ll

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -121,18 +121,20 @@ define void @fun2(<8 x i32> %src, <8 x i31>* %p)
121121
define void @fun3(<3 x i31>* %src, <3 x i31>* %p)
122122
; CHECK-LABEL: fun3:
123123
; CHECK: # %bb.0:
124-
; CHECK-NEXT: llgf %r0, 3(%r2)
125-
; CHECK-NEXT: llgf %r1, 6(%r2)
126-
; CHECK-NEXT: llgf %r2, 0(%r2)
127-
; CHECK-NEXT: rosbg %r1, %r0, 0, 32, 31
128-
; CHECK-NEXT: sllg %r4, %r2, 62
129-
; CHECK-NEXT: rosbg %r4, %r0, 0, 32, 31
130-
; CHECK-NEXT: srlg %r0, %r4, 32
131-
; CHECK-NEXT: st %r1, 8(%r3)
132-
; CHECK-NEXT: sllg %r1, %r2, 30
133-
; CHECK-NEXT: lr %r1, %r0
134-
; CHECK-NEXT: nihh %r1, 8191
135-
; CHECK-NEXT: stg %r1, 0(%r3)
124+
; CHECK-NEXT: l %r0, 8(%r2)
125+
; CHECK-NEXT: lg %r1, 0(%r2)
126+
; CHECK-NEXT: sllg %r2, %r1, 32
127+
; CHECK-NEXT: lr %r2, %r0
128+
; CHECK-NEXT: srlg %r0, %r2, 62
129+
; CHECK-NEXT: st %r2, 8(%r3)
130+
; CHECK-NEXT: rosbg %r0, %r1, 33, 61, 34
131+
; CHECK-NEXT: sllg %r1, %r0, 62
132+
; CHECK-NEXT: rosbg %r1, %r2, 2, 32, 0
133+
; CHECK-NEXT: srlg %r1, %r1, 32
134+
; CHECK-NEXT: sllg %r0, %r0, 30
135+
; CHECK-NEXT: lr %r0, %r1
136+
; CHECK-NEXT: nihh %r0, 8191
137+
; CHECK-NEXT: stg %r0, 0(%r3)
136138
; CHECK-NEXT: br %r14
137139
{
138140
%tmp = load <3 x i31>, <3 x i31>* %src

llvm/test/CodeGen/X86/load-local-v3i1.ll

Lines changed: 11 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -96,27 +96,21 @@ define void @local_load_v3i1(i32 addrspace(1)* %out, i32 addrspace(1)* %in, <3 x
9696
; CHECK-NEXT: pushq %rbx
9797
; CHECK-NEXT: pushq %rax
9898
; CHECK-NEXT: movq %rdi, %r14
99-
; CHECK-NEXT: movzbl (%rdx), %ebp
100-
; CHECK-NEXT: movl %ebp, %eax
101-
; CHECK-NEXT: shrl %eax
102-
; CHECK-NEXT: andl $1, %eax
103-
; CHECK-NEXT: movl %ebp, %ecx
104-
; CHECK-NEXT: andl $1, %ecx
105-
; CHECK-NEXT: movd %ecx, %xmm0
106-
; CHECK-NEXT: pinsrd $1, %eax, %xmm0
107-
; CHECK-NEXT: shrl $2, %ebp
108-
; CHECK-NEXT: andl $1, %ebp
109-
; CHECK-NEXT: pinsrd $2, %ebp, %xmm0
110-
; CHECK-NEXT: movd %xmm0, %ebx
111-
; CHECK-NEXT: pextrd $1, %xmm0, %r15d
99+
; CHECK-NEXT: movb (%rdx), %al
100+
; CHECK-NEXT: movl %eax, %ecx
101+
; CHECK-NEXT: shrb $2, %cl
102+
; CHECK-NEXT: movzbl %al, %r15d
103+
; CHECK-NEXT: shrb %al
104+
; CHECK-NEXT: movzbl %al, %ebx
105+
; CHECK-NEXT: movzbl %cl, %ebp
112106
; CHECK-NEXT: movq %rsi, %rdi
113-
; CHECK-NEXT: movl %ebx, %esi
114-
; CHECK-NEXT: movl %r15d, %edx
107+
; CHECK-NEXT: movl %r15d, %esi
108+
; CHECK-NEXT: movl %ebx, %edx
115109
; CHECK-NEXT: movl %ebp, %ecx
116110
; CHECK-NEXT: callq masked_load_v3
117111
; CHECK-NEXT: movq %r14, %rdi
118-
; CHECK-NEXT: movl %ebx, %esi
119-
; CHECK-NEXT: movl %r15d, %edx
112+
; CHECK-NEXT: movl %r15d, %esi
113+
; CHECK-NEXT: movl %ebx, %edx
120114
; CHECK-NEXT: movl %ebp, %ecx
121115
; CHECK-NEXT: callq masked_store4_v3
122116
; CHECK-NEXT: addq $8, %rsp

0 commit comments

Comments
 (0)