Skip to content

Commit 19b28a7

Browse files
committed
Enable vectorization of i8 values.
1 parent 661f90a commit 19b28a7

File tree

11 files changed

+475
-335
lines changed

11 files changed

+475
-335
lines changed

llvm/include/llvm/Analysis/TargetTransformInfo.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1907,6 +1907,10 @@ class TargetTransformInfo {
19071907
/// pad to. Default is no padding.
19081908
unsigned getNumBytesToPadGlobalArray(unsigned Size, Type *ArrayType) const;
19091909

1910+
/// \return Returns true if vectorizing 4 x i8s into an i32 is possible.
1911+
/// Currently only used by the SLP vectorizer.
1912+
bool canVectorizei8s() const;
1913+
19101914
/// @}
19111915

19121916
/// Collect kernel launch bounds for \p F into \p LB.
@@ -2363,6 +2367,7 @@ class TargetTransformInfo::Concept {
23632367
virtual void collectKernelLaunchBounds(
23642368
const Function &F,
23652369
SmallVectorImpl<std::pair<StringRef, int64_t>> &LB) const = 0;
2370+
virtual bool canVectorizei8s() const = 0;
23662371
};
23672372

23682373
template <typename T>
@@ -3229,6 +3234,8 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
32293234
SmallVectorImpl<std::pair<StringRef, int64_t>> &LB) const override {
32303235
Impl.collectKernelLaunchBounds(F, LB);
32313236
}
3237+
3238+
bool canVectorizei8s() const override { return Impl.canVectorizei8s(); }
32323239
};
32333240

32343241
template <typename T>

llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1063,6 +1063,8 @@ class TargetTransformInfoImplBase {
10631063

10641064
unsigned getMaxNumArgs() const { return UINT_MAX; }
10651065

1066+
bool canVectorizei8s() const { return false; }
1067+
10661068
unsigned getNumBytesToPadGlobalArray(unsigned Size, Type *ArrayType) const {
10671069
return 0;
10681070
}

llvm/lib/Analysis/TargetTransformInfo.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1419,6 +1419,10 @@ unsigned TargetTransformInfo::getMaxNumArgs() const {
14191419
return TTIImpl->getMaxNumArgs();
14201420
}
14211421

1422+
bool TargetTransformInfo::canVectorizei8s() const {
1423+
return TTIImpl->canVectorizei8s();
1424+
}
1425+
14221426
bool TargetTransformInfo::shouldExpandReduction(const IntrinsicInst *II) const {
14231427
return TTIImpl->shouldExpandReduction(II);
14241428
}

llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -344,9 +344,10 @@ unsigned GCNTTIImpl::getMinVectorRegisterBitWidth() const {
344344
unsigned GCNTTIImpl::getMaximumVF(unsigned ElemWidth, unsigned Opcode) const {
345345
if (Opcode == Instruction::Load || Opcode == Instruction::Store)
346346
return 32 * 4 / ElemWidth;
347-
return (ElemWidth == 16 && ST->has16BitInsts()) ? 2
348-
: (ElemWidth == 32 && ST->hasPackedFP32Ops()) ? 2
349-
: 1;
347+
return ElemWidth == 8 ? 4
348+
: (ElemWidth == 16 && ST->has16BitInsts()) ? 2
349+
: (ElemWidth == 32 && ST->hasPackedFP32Ops()) ? 2
350+
: 1;
350351
}
351352

352353
unsigned GCNTTIImpl::getLoadVectorFactor(unsigned VF, unsigned LoadSize,
@@ -1120,6 +1121,17 @@ Value *GCNTTIImpl::rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
11201121
}
11211122
}
11221123

1124+
InstructionCost GCNTTIImpl::getScalarizationOverhead(
1125+
VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract,
1126+
TTI::TargetCostKind CostKind, ArrayRef<Value *> VL) {
1127+
unsigned NumVectorElts = cast<FixedVectorType>(InTy)->getNumElements();
1128+
if (NumVectorElts > 1 &&
1129+
InTy->getElementType() == IntegerType::getInt8Ty(InTy->getContext()))
1130+
return 0;
1131+
return BaseT::getScalarizationOverhead(InTy, DemandedElts, Insert, Extract,
1132+
CostKind, VL);
1133+
}
1134+
11231135
InstructionCost GCNTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
11241136
VectorType *VT, ArrayRef<int> Mask,
11251137
TTI::TargetCostKind CostKind,
@@ -1134,6 +1146,11 @@ InstructionCost GCNTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
11341146
// Larger vector widths may require additional instructions, but are
11351147
// typically cheaper than scalarized versions.
11361148
unsigned NumVectorElts = cast<FixedVectorType>(VT)->getNumElements();
1149+
1150+
if (NumVectorElts > 1 &&
1151+
VT->getElementType() == IntegerType::getInt8Ty(VT->getContext()))
1152+
return 0;
1153+
11371154
if (ST->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS &&
11381155
DL.getTypeSizeInBits(VT->getElementType()) == 16) {
11391156
bool HasVOP3P = ST->hasVOP3PInsts();
@@ -1423,3 +1440,5 @@ void GCNTTIImpl::collectKernelLaunchBounds(
14231440
LB.push_back({"amdgpu-waves-per-eu[0]", WavesPerEU.first});
14241441
LB.push_back({"amdgpu-waves-per-eu[1]", WavesPerEU.second});
14251442
}
1443+
1444+
bool GCNTTIImpl::canVectorizei8s() const { return true; }

llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,12 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
240240

241241
InstructionCost getVectorSplitCost() { return 0; }
242242

243+
InstructionCost getScalarizationOverhead(VectorType *InTy,
244+
const APInt &DemandedElts,
245+
bool Insert, bool Extract,
246+
TTI::TargetCostKind CostKind,
247+
ArrayRef<Value *> VL = {});
248+
243249
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
244250
ArrayRef<int> Mask,
245251
TTI::TargetCostKind CostKind, int Index,
@@ -282,6 +288,9 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
282288
void collectKernelLaunchBounds(
283289
const Function &F,
284290
SmallVectorImpl<std::pair<StringRef, int64_t>> &LB) const;
291+
292+
/// \return return true if we can pack 4 i8s into an i32.
293+
bool canVectorizei8s() const;
285294
};
286295

287296
} // end namespace llvm

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12971,9 +12971,16 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
1297112971
LI0->getPointerAddressSpace(), CostKind);
1297212972

1297312973
} else {
12974-
VecLdCost = TTI->getMemoryOpCost(
12975-
Instruction::Load, VecTy, LI0->getAlign(),
12976-
LI0->getPointerAddressSpace(), CostKind, TTI::OperandValueInfo());
12974+
if (VecTy->getElementType() ==
12975+
IntegerType::getInt8Ty(VecTy->getContext()) &&
12976+
TTI->canVectorizei8s()) {
12977+
VecLdCost = 1;
12978+
} else {
12979+
VecLdCost =
12980+
TTI->getMemoryOpCost(Instruction::Load, VecTy, LI0->getAlign(),
12981+
LI0->getPointerAddressSpace(), CostKind,
12982+
TTI::OperandValueInfo());
12983+
}
1297712984
}
1297812985
break;
1297912986
case TreeEntry::StridedVectorize: {
@@ -20927,7 +20934,11 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
2092720934
// provided vectorization factor (i.e. the scalar type is used for vector
2092820935
// code during codegen).
2092920936
auto *VecTy = getWidenedType(ScalarTy, VF);
20930-
if (TTI->getNumberOfParts(VecTy) == VF)
20937+
unsigned NumParts = TTI->getNumberOfParts(VecTy);
20938+
if (TTI->canVectorizei8s() &&
20939+
VecTy->getElementType() == IntegerType::getInt8Ty(VecTy->getContext()))
20940+
NumParts = 1;
20941+
if (NumParts == VF)
2093120942
continue;
2093220943
for (unsigned I = NextInst; I < MaxInst; ++I) {
2093320944
unsigned ActualVF = std::min(MaxInst - I, VF);

llvm/test/Analysis/CostModel/AMDGPU/arith-uminmax.ll

Lines changed: 28 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -47,13 +47,13 @@ define i32 @umax(i32 %arg) {
4747
; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.umax.v4i32(<4 x i32> undef, <4 x i32> undef)
4848
; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.umax.v8i32(<8 x i32> undef, <8 x i32> undef)
4949
; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <16 x i32> @llvm.umax.v16i32(<16 x i32> undef, <16 x i32> undef)
50-
; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.umax.i16(i16 undef, i16 undef)
51-
; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = call <2 x i16> @llvm.umax.v2i16(<2 x i16> undef, <2 x i16> undef)
52-
; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.umax.v4i16(<4 x i16> undef, <4 x i16> undef)
53-
; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.umax.v8i16(<8 x i16> undef, <8 x i16> undef)
54-
; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.umax.v16i16(<16 x i16> undef, <16 x i16> undef)
55-
; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.umax.v32i16(<32 x i16> undef, <32 x i16> undef)
56-
; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.umax.i8(i8 undef, i8 undef)
50+
; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.umax.i16(i16 undef, i16 undef)
51+
; FAST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I16 = call <2 x i16> @llvm.umax.v2i16(<2 x i16> undef, <2 x i16> undef)
52+
; FAST-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I16 = call <4 x i16> @llvm.umax.v4i16(<4 x i16> undef, <4 x i16> undef)
53+
; FAST-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I16 = call <8 x i16> @llvm.umax.v8i16(<8 x i16> undef, <8 x i16> undef)
54+
; FAST-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V16I16 = call <16 x i16> @llvm.umax.v16i16(<16 x i16> undef, <16 x i16> undef)
55+
; FAST-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %V32I16 = call <32 x i16> @llvm.umax.v32i16(<32 x i16> undef, <32 x i16> undef)
56+
; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.umax.i8(i8 undef, i8 undef)
5757
; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = call <2 x i8> @llvm.umax.v2i8(<2 x i8> undef, <2 x i8> undef)
5858
; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I8 = call <4 x i8> @llvm.umax.v4i8(<4 x i8> undef, <4 x i8> undef)
5959
; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.umax.v8i8(<8 x i8> undef, <8 x i8> undef)
@@ -72,13 +72,13 @@ define i32 @umax(i32 %arg) {
7272
; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.umax.v4i32(<4 x i32> undef, <4 x i32> undef)
7373
; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.umax.v8i32(<8 x i32> undef, <8 x i32> undef)
7474
; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <16 x i32> @llvm.umax.v16i32(<16 x i32> undef, <16 x i32> undef)
75-
; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.umax.i16(i16 undef, i16 undef)
76-
; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <2 x i16> @llvm.umax.v2i16(<2 x i16> undef, <2 x i16> undef)
77-
; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.umax.v4i16(<4 x i16> undef, <4 x i16> undef)
78-
; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.umax.v8i16(<8 x i16> undef, <8 x i16> undef)
79-
; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <16 x i16> @llvm.umax.v16i16(<16 x i16> undef, <16 x i16> undef)
80-
; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call <32 x i16> @llvm.umax.v32i16(<32 x i16> undef, <32 x i16> undef)
81-
; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.umax.i8(i8 undef, i8 undef)
75+
; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.umax.i16(i16 undef, i16 undef)
76+
; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I16 = call <2 x i16> @llvm.umax.v2i16(<2 x i16> undef, <2 x i16> undef)
77+
; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.umax.v4i16(<4 x i16> undef, <4 x i16> undef)
78+
; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.umax.v8i16(<8 x i16> undef, <8 x i16> undef)
79+
; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I16 = call <16 x i16> @llvm.umax.v16i16(<16 x i16> undef, <16 x i16> undef)
80+
; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I16 = call <32 x i16> @llvm.umax.v32i16(<32 x i16> undef, <32 x i16> undef)
81+
; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.umax.i8(i8 undef, i8 undef)
8282
; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = call <2 x i8> @llvm.umax.v2i8(<2 x i8> undef, <2 x i8> undef)
8383
; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I8 = call <4 x i8> @llvm.umax.v4i8(<4 x i8> undef, <4 x i8> undef)
8484
; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.umax.v8i8(<8 x i8> undef, <8 x i8> undef)
@@ -153,13 +153,13 @@ define i32 @umin(i32 %arg) {
153153
; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.umin.v4i32(<4 x i32> undef, <4 x i32> undef)
154154
; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.umin.v8i32(<8 x i32> undef, <8 x i32> undef)
155155
; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <16 x i32> @llvm.umin.v16i32(<16 x i32> undef, <16 x i32> undef)
156-
; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.umin.i16(i16 undef, i16 undef)
157-
; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = call <2 x i16> @llvm.umin.v2i16(<2 x i16> undef, <2 x i16> undef)
158-
; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.umin.v4i16(<4 x i16> undef, <4 x i16> undef)
159-
; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.umin.v8i16(<8 x i16> undef, <8 x i16> undef)
160-
; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.umin.v16i16(<16 x i16> undef, <16 x i16> undef)
161-
; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.umin.v32i16(<32 x i16> undef, <32 x i16> undef)
162-
; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.umin.i8(i8 undef, i8 undef)
156+
; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.umin.i16(i16 undef, i16 undef)
157+
; FAST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I16 = call <2 x i16> @llvm.umin.v2i16(<2 x i16> undef, <2 x i16> undef)
158+
; FAST-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I16 = call <4 x i16> @llvm.umin.v4i16(<4 x i16> undef, <4 x i16> undef)
159+
; FAST-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I16 = call <8 x i16> @llvm.umin.v8i16(<8 x i16> undef, <8 x i16> undef)
160+
; FAST-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V16I16 = call <16 x i16> @llvm.umin.v16i16(<16 x i16> undef, <16 x i16> undef)
161+
; FAST-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %V32I16 = call <32 x i16> @llvm.umin.v32i16(<32 x i16> undef, <32 x i16> undef)
162+
; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.umin.i8(i8 undef, i8 undef)
163163
; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = call <2 x i8> @llvm.umin.v2i8(<2 x i8> undef, <2 x i8> undef)
164164
; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I8 = call <4 x i8> @llvm.umin.v4i8(<4 x i8> undef, <4 x i8> undef)
165165
; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.umin.v8i8(<8 x i8> undef, <8 x i8> undef)
@@ -178,13 +178,13 @@ define i32 @umin(i32 %arg) {
178178
; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.umin.v4i32(<4 x i32> undef, <4 x i32> undef)
179179
; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.umin.v8i32(<8 x i32> undef, <8 x i32> undef)
180180
; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <16 x i32> @llvm.umin.v16i32(<16 x i32> undef, <16 x i32> undef)
181-
; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.umin.i16(i16 undef, i16 undef)
182-
; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <2 x i16> @llvm.umin.v2i16(<2 x i16> undef, <2 x i16> undef)
183-
; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.umin.v4i16(<4 x i16> undef, <4 x i16> undef)
184-
; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.umin.v8i16(<8 x i16> undef, <8 x i16> undef)
185-
; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <16 x i16> @llvm.umin.v16i16(<16 x i16> undef, <16 x i16> undef)
186-
; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call <32 x i16> @llvm.umin.v32i16(<32 x i16> undef, <32 x i16> undef)
187-
; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.umin.i8(i8 undef, i8 undef)
181+
; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.umin.i16(i16 undef, i16 undef)
182+
; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I16 = call <2 x i16> @llvm.umin.v2i16(<2 x i16> undef, <2 x i16> undef)
183+
; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.umin.v4i16(<4 x i16> undef, <4 x i16> undef)
184+
; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.umin.v8i16(<8 x i16> undef, <8 x i16> undef)
185+
; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I16 = call <16 x i16> @llvm.umin.v16i16(<16 x i16> undef, <16 x i16> undef)
186+
; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I16 = call <32 x i16> @llvm.umin.v32i16(<32 x i16> undef, <32 x i16> undef)
187+
; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.umin.i8(i8 undef, i8 undef)
188188
; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = call <2 x i8> @llvm.umin.v2i8(<2 x i8> undef, <2 x i8> undef)
189189
; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I8 = call <4 x i8> @llvm.umin.v4i8(<4 x i8> undef, <4 x i8> undef)
190190
; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.umin.v8i8(<8 x i8> undef, <8 x i8> undef)

0 commit comments

Comments
 (0)