Skip to content

Commit 27bd8f9

Browse files
committed
Recommit "[SLP] Fix lookahead operand reordering for splat loads." attempt 2, fixed assertion crash.
Original review: https://reviews.llvm.org/D121354 This reverts commit f7d7d2a.
1 parent 07675b0 commit 27bd8f9

23 files changed

+237
-96
lines changed

llvm/include/llvm/Analysis/TargetTransformInfo.h

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -658,6 +658,10 @@ class TargetTransformInfo {
658658
/// Return true if the target supports nontemporal load.
659659
bool isLegalNTLoad(Type *DataType, Align Alignment) const;
660660

661+
/// \Returns true if the target supports broadcasting a load to a vector of
662+
/// type <NumElements x ElementTy>.
663+
bool isLegalBroadcastLoad(Type *ElementTy, unsigned NumElements) const;
664+
661665
/// Return true if the target supports masked scatter.
662666
bool isLegalMaskedScatter(Type *DataType, Align Alignment) const;
663667
/// Return true if the target supports masked gather.
@@ -1044,11 +1048,14 @@ class TargetTransformInfo {
10441048
/// The exact mask may be passed as Mask, or else the array will be empty.
10451049
/// The index and subtype parameters are used by the subvector insertion and
10461050
/// extraction shuffle kinds to show the insert/extract point and the type of
1047-
/// the subvector being inserted/extracted.
1051+
/// the subvector being inserted/extracted. The operands of the shuffle can be
1052+
/// passed through \p Args, which helps improve the cost estimation in some
1053+
/// cases, like in broadcast loads.
10481054
/// NOTE: For subvector extractions Tp represents the source type.
10491055
InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp,
10501056
ArrayRef<int> Mask = None, int Index = 0,
1051-
VectorType *SubTp = nullptr) const;
1057+
VectorType *SubTp = nullptr,
1058+
ArrayRef<Value *> Args = None) const;
10521059

10531060
/// Represents a hint about the context in which a cast is used.
10541061
///
@@ -1549,6 +1556,8 @@ class TargetTransformInfo::Concept {
15491556
virtual bool isLegalMaskedLoad(Type *DataType, Align Alignment) = 0;
15501557
virtual bool isLegalNTStore(Type *DataType, Align Alignment) = 0;
15511558
virtual bool isLegalNTLoad(Type *DataType, Align Alignment) = 0;
1559+
virtual bool isLegalBroadcastLoad(Type *ElementTy,
1560+
unsigned NumElements) const = 0;
15521561
virtual bool isLegalMaskedScatter(Type *DataType, Align Alignment) = 0;
15531562
virtual bool isLegalMaskedGather(Type *DataType, Align Alignment) = 0;
15541563
virtual bool forceScalarizeMaskedGather(VectorType *DataType,
@@ -1659,7 +1668,8 @@ class TargetTransformInfo::Concept {
16591668
ArrayRef<const Value *> Args, const Instruction *CxtI = nullptr) = 0;
16601669
virtual InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp,
16611670
ArrayRef<int> Mask, int Index,
1662-
VectorType *SubTp) = 0;
1671+
VectorType *SubTp,
1672+
ArrayRef<Value *> Args) = 0;
16631673
virtual InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst,
16641674
Type *Src, CastContextHint CCH,
16651675
TTI::TargetCostKind CostKind,
@@ -1952,6 +1962,10 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
19521962
bool isLegalNTLoad(Type *DataType, Align Alignment) override {
19531963
return Impl.isLegalNTLoad(DataType, Alignment);
19541964
}
1965+
bool isLegalBroadcastLoad(Type *ElementTy,
1966+
unsigned NumElements) const override {
1967+
return Impl.isLegalBroadcastLoad(ElementTy, NumElements);
1968+
}
19551969
bool isLegalMaskedScatter(Type *DataType, Align Alignment) override {
19561970
return Impl.isLegalMaskedScatter(DataType, Alignment);
19571971
}
@@ -2179,8 +2193,9 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
21792193
}
21802194
InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp,
21812195
ArrayRef<int> Mask, int Index,
2182-
VectorType *SubTp) override {
2183-
return Impl.getShuffleCost(Kind, Tp, Mask, Index, SubTp);
2196+
VectorType *SubTp,
2197+
ArrayRef<Value *> Args) override {
2198+
return Impl.getShuffleCost(Kind, Tp, Mask, Index, SubTp, Args);
21842199
}
21852200
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
21862201
CastContextHint CCH,

llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,10 @@ class TargetTransformInfoImplBase {
256256
return Alignment >= DataSize && isPowerOf2_32(DataSize);
257257
}
258258

259+
bool isLegalBroadcastLoad(Type *ElementTy, unsigned NumElements) const {
260+
return false;
261+
}
262+
259263
bool isLegalMaskedScatter(Type *DataType, Align Alignment) const {
260264
return false;
261265
}
@@ -488,7 +492,8 @@ class TargetTransformInfoImplBase {
488492

489493
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Ty,
490494
ArrayRef<int> Mask, int Index,
491-
VectorType *SubTp) const {
495+
VectorType *SubTp,
496+
ArrayRef<Value *> Args = None) const {
492497
return 1;
493498
}
494499

llvm/include/llvm/CodeGen/BasicTTIImpl.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -871,7 +871,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
871871

872872
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
873873
ArrayRef<int> Mask, int Index,
874-
VectorType *SubTp) {
874+
VectorType *SubTp,
875+
ArrayRef<Value *> Args = None) {
875876

876877
switch (improveShuffleKindFromMask(Kind, Mask)) {
877878
case TTI::SK_Broadcast:

llvm/lib/Analysis/TargetTransformInfo.cpp

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -396,6 +396,11 @@ bool TargetTransformInfo::isLegalNTLoad(Type *DataType, Align Alignment) const {
396396
return TTIImpl->isLegalNTLoad(DataType, Alignment);
397397
}
398398

399+
bool TargetTransformInfo::isLegalBroadcastLoad(Type *ElementTy,
400+
unsigned NumElements) const {
401+
return TTIImpl->isLegalBroadcastLoad(ElementTy, NumElements);
402+
}
403+
399404
bool TargetTransformInfo::isLegalMaskedGather(Type *DataType,
400405
Align Alignment) const {
401406
return TTIImpl->isLegalMaskedGather(DataType, Alignment);
@@ -740,12 +745,11 @@ InstructionCost TargetTransformInfo::getArithmeticInstrCost(
740745
return Cost;
741746
}
742747

743-
InstructionCost TargetTransformInfo::getShuffleCost(ShuffleKind Kind,
744-
VectorType *Ty,
745-
ArrayRef<int> Mask,
746-
int Index,
747-
VectorType *SubTp) const {
748-
InstructionCost Cost = TTIImpl->getShuffleCost(Kind, Ty, Mask, Index, SubTp);
748+
InstructionCost TargetTransformInfo::getShuffleCost(
749+
ShuffleKind Kind, VectorType *Ty, ArrayRef<int> Mask, int Index,
750+
VectorType *SubTp, ArrayRef<Value *> Args) const {
751+
InstructionCost Cost =
752+
TTIImpl->getShuffleCost(Kind, Ty, Mask, Index, SubTp, Args);
749753
assert(Cost >= 0 && "TTI should not produce negative costs!");
750754
return Cost;
751755
}

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2604,7 +2604,8 @@ InstructionCost AArch64TTIImpl::getSpliceCost(VectorType *Tp, int Index) {
26042604
InstructionCost AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
26052605
VectorType *Tp,
26062606
ArrayRef<int> Mask, int Index,
2607-
VectorType *SubTp) {
2607+
VectorType *SubTp,
2608+
ArrayRef<Value *> Args) {
26082609
Kind = improveShuffleKindFromMask(Kind, Mask);
26092610
if (Kind == TTI::SK_Broadcast || Kind == TTI::SK_Transpose ||
26102611
Kind == TTI::SK_Select || Kind == TTI::SK_PermuteSingleSrc ||

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -330,7 +330,8 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
330330

331331
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
332332
ArrayRef<int> Mask, int Index,
333-
VectorType *SubTp);
333+
VectorType *SubTp,
334+
ArrayRef<Value *> Args = None);
334335
/// @}
335336
};
336337

llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1042,7 +1042,8 @@ Value *GCNTTIImpl::rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
10421042

10431043
InstructionCost GCNTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
10441044
VectorType *VT, ArrayRef<int> Mask,
1045-
int Index, VectorType *SubTp) {
1045+
int Index, VectorType *SubTp,
1046+
ArrayRef<Value *> Args) {
10461047
Kind = improveShuffleKindFromMask(Kind, Mask);
10471048
if (ST->hasVOP3PInsts()) {
10481049
if (cast<FixedVectorType>(VT)->getNumElements() == 2 &&

llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -201,7 +201,8 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
201201

202202
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
203203
ArrayRef<int> Mask, int Index,
204-
VectorType *SubTp);
204+
VectorType *SubTp,
205+
ArrayRef<Value *> Args = None);
205206

206207
bool areInlineCompatible(const Function *Caller,
207208
const Function *Callee) const;

llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1202,7 +1202,8 @@ InstructionCost ARMTTIImpl::getMemcpyCost(const Instruction *I) {
12021202

12031203
InstructionCost ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
12041204
VectorType *Tp, ArrayRef<int> Mask,
1205-
int Index, VectorType *SubTp) {
1205+
int Index, VectorType *SubTp,
1206+
ArrayRef<Value *> Args) {
12061207
Kind = improveShuffleKindFromMask(Kind, Mask);
12071208
if (ST->hasNEON()) {
12081209
if (Kind == TTI::SK_Broadcast) {

llvm/lib/Target/ARM/ARMTargetTransformInfo.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,8 @@ class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
213213

214214
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
215215
ArrayRef<int> Mask, int Index,
216-
VectorType *SubTp);
216+
VectorType *SubTp,
217+
ArrayRef<Value *> Args = None);
217218

218219
bool preferInLoopReduction(unsigned Opcode, Type *Ty,
219220
TTI::ReductionFlags Flags) const;

llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,8 @@ HexagonTTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
223223

224224
InstructionCost HexagonTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp,
225225
ArrayRef<int> Mask, int Index,
226-
Type *SubTp) {
226+
Type *SubTp,
227+
ArrayRef<Value *> Args) {
227228
return 1;
228229
}
229230

llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,8 @@ class HexagonTTIImpl : public BasicTTIImplBase<HexagonTTIImpl> {
125125
Align Alignment, unsigned AddressSpace,
126126
TTI::TargetCostKind CostKind);
127127
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, Type *Tp,
128-
ArrayRef<int> Mask, int Index, Type *SubTp);
128+
ArrayRef<int> Mask, int Index, Type *SubTp,
129+
ArrayRef<Value *> Args = None);
129130
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
130131
const Value *Ptr, bool VariableMask,
131132
Align Alignment,

llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1015,7 +1015,8 @@ InstructionCost PPCTTIImpl::getArithmeticInstrCost(
10151015

10161016
InstructionCost PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp,
10171017
ArrayRef<int> Mask, int Index,
1018-
Type *SubTp) {
1018+
Type *SubTp,
1019+
ArrayRef<Value *> Args) {
10191020

10201021
InstructionCost CostFactor =
10211022
vectorCostAdjustmentFactor(Instruction::ShuffleVector, Tp, nullptr);

llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,8 @@ class PPCTTIImpl : public BasicTTIImplBase<PPCTTIImpl> {
111111
ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
112112
const Instruction *CxtI = nullptr);
113113
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, Type *Tp,
114-
ArrayRef<int> Mask, int Index, Type *SubTp);
114+
ArrayRef<int> Mask, int Index, Type *SubTp,
115+
ArrayRef<Value *> Args = None);
115116
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
116117
TTI::CastContextHint CCH,
117118
TTI::TargetCostKind CostKind,

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,8 @@ InstructionCost RISCVTTIImpl::getSpliceCost(VectorType *Tp, int Index) {
175175

176176
InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
177177
VectorType *Tp, ArrayRef<int> Mask,
178-
int Index, VectorType *SubTp) {
178+
int Index, VectorType *SubTp,
179+
ArrayRef<Value *> Args) {
179180
if (Kind == TTI::SK_Splice && isa<ScalableVectorType>(Tp))
180181
return getSpliceCost(Tp, Index);
181182
return BaseT::getShuffleCost(Kind, Tp, Mask, Index, SubTp);

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,8 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
8080
InstructionCost getSpliceCost(VectorType *Tp, int Index);
8181
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
8282
ArrayRef<int> Mask, int Index,
83-
VectorType *SubTp);
83+
VectorType *SubTp,
84+
ArrayRef<Value *> Args = None);
8485

8586
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
8687
const Value *Ptr, bool VariableMask,

llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -559,7 +559,8 @@ InstructionCost SystemZTTIImpl::getArithmeticInstrCost(
559559
InstructionCost SystemZTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
560560
VectorType *Tp,
561561
ArrayRef<int> Mask, int Index,
562-
VectorType *SubTp) {
562+
VectorType *SubTp,
563+
ArrayRef<Value *> Args) {
563564
Kind = improveShuffleKindFromMask(Kind, Mask);
564565
if (ST->hasVector()) {
565566
unsigned NumVectors = getNumVectorRegs(Tp);

llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,8 @@ class SystemZTTIImpl : public BasicTTIImplBase<SystemZTTIImpl> {
9292
const Instruction *CxtI = nullptr);
9393
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
9494
ArrayRef<int> Mask, int Index,
95-
VectorType *SubTp);
95+
VectorType *SubTp,
96+
ArrayRef<Value *> Args = None);
9697
unsigned getVectorTruncCost(Type *SrcTy, Type *DstTy);
9798
unsigned getVectorBitmaskConversionCost(Type *SrcTy, Type *DstTy);
9899
unsigned getBoolVecToIntConversionCost(unsigned Opcode, Type *Dst,

llvm/lib/Target/X86/X86TargetTransformInfo.cpp

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1085,7 +1085,8 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
10851085
InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
10861086
VectorType *BaseTp,
10871087
ArrayRef<int> Mask, int Index,
1088-
VectorType *SubTp) {
1088+
VectorType *SubTp,
1089+
ArrayRef<Value *> Args) {
10891090
// 64-bit packed float vectors (v2f32) are widened to type v4f32.
10901091
// 64-bit packed integer vectors (v2i32) are widened to type v4i32.
10911092
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, BaseTp);
@@ -1545,9 +1546,27 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
15451546
{ TTI::SK_PermuteTwoSrc, MVT::v16i8, 13 }, // blend+permute
15461547
};
15471548

1548-
if (ST->hasSSE2())
1549+
static const CostTblEntry SSE3BroadcastLoadTbl[] = {
1550+
{TTI::SK_Broadcast, MVT::v2f64, 0}, // broadcast handled by movddup
1551+
};
1552+
1553+
if (ST->hasSSE2()) {
1554+
bool IsLoad = !Args.empty() && llvm::all_of(Args, [](const Value *V) {
1555+
return isa<LoadInst>(V);
1556+
});
1557+
if (ST->hasSSE3() && IsLoad)
1558+
if (const auto *Entry =
1559+
CostTableLookup(SSE3BroadcastLoadTbl, Kind, LT.second)) {
1560+
assert(isLegalBroadcastLoad(
1561+
BaseTp->getElementType(),
1562+
cast<FixedVectorType>(BaseTp)->getNumElements()) &&
1563+
"Table entry missing from isLegalBroadcastLoad()");
1564+
return LT.first * Entry->Cost;
1565+
}
1566+
15491567
if (const auto *Entry = CostTableLookup(SSE2ShuffleTbl, Kind, LT.second))
15501568
return LT.first * Entry->Cost;
1569+
}
15511570

15521571
static const CostTblEntry SSE1ShuffleTbl[] = {
15531572
{ TTI::SK_Broadcast, MVT::v4f32, 1 }, // shufps
@@ -5118,6 +5137,13 @@ bool X86TTIImpl::isLegalNTStore(Type *DataType, Align Alignment) {
51185137
return true;
51195138
}
51205139

5140+
bool X86TTIImpl::isLegalBroadcastLoad(Type *ElementTy,
5141+
unsigned NumElements) const {
5142+
// movddup
5143+
return ST->hasSSE3() && NumElements == 2 &&
5144+
ElementTy == Type::getDoubleTy(ElementTy->getContext());
5145+
}
5146+
51215147
bool X86TTIImpl::isLegalMaskedExpandLoad(Type *DataTy) {
51225148
if (!isa<VectorType>(DataTy))
51235149
return false;

llvm/lib/Target/X86/X86TargetTransformInfo.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,8 @@ class X86TTIImpl : public BasicTTIImplBase<X86TTIImpl> {
131131
const Instruction *CxtI = nullptr);
132132
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
133133
ArrayRef<int> Mask, int Index,
134-
VectorType *SubTp);
134+
VectorType *SubTp,
135+
ArrayRef<Value *> Args = None);
135136
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
136137
TTI::CastContextHint CCH,
137138
TTI::TargetCostKind CostKind,
@@ -226,6 +227,7 @@ class X86TTIImpl : public BasicTTIImplBase<X86TTIImpl> {
226227
bool isLegalMaskedStore(Type *DataType, Align Alignment);
227228
bool isLegalNTLoad(Type *DataType, Align Alignment);
228229
bool isLegalNTStore(Type *DataType, Align Alignment);
230+
bool isLegalBroadcastLoad(Type *ElementTy, unsigned NumElements) const;
229231
bool forceScalarizeMaskedGather(VectorType *VTy, Align Alignment);
230232
bool forceScalarizeMaskedScatter(VectorType *VTy, Align Alignment) {
231233
return forceScalarizeMaskedGather(VTy, Alignment);

0 commit comments

Comments
 (0)