Skip to content

Commit da1f620

Browse files
authored
Merge pull request #7993 from fhahn/aarch64-vec3-load-store-cost
Pick non-power-of-2 load/store cost improvements
2 parents b6ab783 + 790fe38 commit da1f620

File tree

2 files changed

+368
-8
lines changed

2 files changed

+368
-8
lines changed

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 41 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2987,14 +2987,47 @@ InstructionCost AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty,
29872987
if (Ty->isPtrOrPtrVectorTy())
29882988
return LT.first;
29892989

2990-
// Check truncating stores and extending loads.
2991-
if (useNeonVector(Ty) &&
2992-
Ty->getScalarSizeInBits() != LT.second.getScalarSizeInBits()) {
2993-
// v4i8 types are lowered to scalar a load/store and sshll/xtn.
2994-
if (VT == MVT::v4i8)
2995-
return 2;
2996-
// Otherwise we need to scalarize.
2997-
return cast<FixedVectorType>(Ty)->getNumElements() * 2;
2990+
if (useNeonVector(Ty)) {
2991+
// Check truncating stores and extending loads.
2992+
if (Ty->getScalarSizeInBits() != LT.second.getScalarSizeInBits()) {
2993+
// v4i8 types are lowered to scalar a load/store and sshll/xtn.
2994+
if (VT == MVT::v4i8)
2995+
return 2;
2996+
// Otherwise we need to scalarize.
2997+
return cast<FixedVectorType>(Ty)->getNumElements() * 2;
2998+
}
2999+
EVT EltVT = VT.getVectorElementType();
3000+
unsigned EltSize = EltVT.getScalarSizeInBits();
3001+
if (!isPowerOf2_32(EltSize) || EltSize < 8 || EltSize > 64 ||
3002+
VT.getVectorNumElements() >= (128 / EltSize) || !Alignment ||
3003+
*Alignment != Align(1))
3004+
return LT.first;
3005+
// FIXME: v3i8 lowering currently is very inefficient, due to automatic
3006+
// widening to v4i8, which produces suboptimal results.
3007+
if (VT.getVectorNumElements() == 3 && EltVT == MVT::i8)
3008+
return LT.first;
3009+
3010+
// Check non-power-of-2 loads/stores for legal vector element types with
3011+
// NEON. Non-power-of-2 memory ops will get broken down to a set of
3012+
// operations on smaller power-of-2 ops, including ld1/st1.
3013+
LLVMContext &C = Ty->getContext();
3014+
InstructionCost Cost(0);
3015+
SmallVector<EVT> TypeWorklist;
3016+
TypeWorklist.push_back(VT);
3017+
while (!TypeWorklist.empty()) {
3018+
EVT CurrVT = TypeWorklist.pop_back_val();
3019+
unsigned CurrNumElements = CurrVT.getVectorNumElements();
3020+
if (isPowerOf2_32(CurrNumElements)) {
3021+
Cost += 1;
3022+
continue;
3023+
}
3024+
3025+
unsigned PrevPow2 = NextPowerOf2(CurrNumElements) / 2;
3026+
TypeWorklist.push_back(EVT::getVectorVT(C, EltVT, PrevPow2));
3027+
TypeWorklist.push_back(
3028+
EVT::getVectorVT(C, EltVT, CurrNumElements - PrevPow2));
3029+
}
3030+
return Cost;
29983031
}
29993032

30003033
return LT.first;
Lines changed: 327 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,327 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 4
2+
; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=arm64-apple-macosx < %s | FileCheck %s
3+
4+
define void @vec3_i32(<3 x i32> %a, <3 x i32> %b, ptr %src, ptr %dst) {
5+
; CHECK-LABEL: 'vec3_i32'
6+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %l = load <3 x i32>, ptr %src, align 1
7+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add = add <3 x i32> %l, %b
8+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp uge <3 x i32> %add, %a
9+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub = sub <3 x i32> %add, %a
10+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sel = select <3 x i1> %cmp, <3 x i32> %add, <3 x i32> %sub
11+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <3 x i32> %sel, ptr %dst, align 1
12+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
13+
;
14+
%l = load <3 x i32>, ptr %src, align 1
15+
%add = add <3 x i32> %l, %b
16+
%cmp = icmp uge <3 x i32> %add, %a
17+
%sub = sub <3 x i32> %add, %a
18+
%sel = select <3 x i1> %cmp, <3 x i32> %add, <3 x i32> %sub
19+
store <3 x i32> %sel, ptr %dst, align 1
20+
ret void
21+
}
22+
23+
define void @vec3_i32_default_alignment(<3 x i32> %a, <3 x i32> %b, ptr %src, ptr %dst) {
24+
; CHECK-LABEL: 'vec3_i32_default_alignment'
25+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %l = load <3 x i32>, ptr %src, align 16
26+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add = add <3 x i32> %l, %b
27+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp uge <3 x i32> %add, %a
28+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub = sub <3 x i32> %add, %a
29+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sel = select <3 x i1> %cmp, <3 x i32> %add, <3 x i32> %sub
30+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <3 x i32> %sel, ptr %dst, align 16
31+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
32+
;
33+
%l = load <3 x i32>, ptr %src
34+
%add = add <3 x i32> %l, %b
35+
%cmp = icmp uge <3 x i32> %add, %a
36+
%sub = sub <3 x i32> %add, %a
37+
%sel = select <3 x i1> %cmp, <3 x i32> %add, <3 x i32> %sub
38+
store <3 x i32> %sel, ptr %dst
39+
ret void
40+
}
41+
42+
define void @vec3_i16(<3 x i16> %a, <3 x i16> %b, ptr %src, ptr %dst) {
43+
; CHECK-LABEL: 'vec3_i16'
44+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %l = load <3 x i16>, ptr %src, align 1
45+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add = add <3 x i16> %l, %b
46+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp uge <3 x i16> %add, %a
47+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub = sub <3 x i16> %add, %a
48+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sel = select <3 x i1> %cmp, <3 x i16> %add, <3 x i16> %sub
49+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <3 x i16> %sel, ptr %dst, align 1
50+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
51+
;
52+
%l = load <3 x i16>, ptr %src, align 1
53+
%add = add <3 x i16> %l, %b
54+
%cmp = icmp uge <3 x i16> %add, %a
55+
%sub = sub <3 x i16> %add, %a
56+
%sel = select <3 x i1> %cmp, <3 x i16> %add, <3 x i16> %sub
57+
store <3 x i16> %sel, ptr %dst, align 1
58+
ret void
59+
}
60+
61+
define void @vec7_i16(ptr %src, ptr %dst) {
62+
; CHECK-LABEL: 'vec7_i16'
63+
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %l = load <7 x i16>, ptr %src, align 1
64+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add = add <7 x i16> %l, %l
65+
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <7 x i16> %add, ptr %dst, align 1
66+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
67+
;
68+
%l = load <7 x i16>, ptr %src, align 1
69+
%add = add <7 x i16> %l, %l
70+
store <7 x i16> %add, ptr %dst, align 1
71+
ret void
72+
}
73+
74+
define void @vec6_i16(ptr %src, ptr %dst) {
75+
; CHECK-LABEL: 'vec6_i16'
76+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %l = load <6 x i16>, ptr %src, align 1
77+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add = add <6 x i16> %l, %l
78+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <6 x i16> %add, ptr %dst, align 1
79+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
80+
;
81+
%l = load <6 x i16>, ptr %src, align 1
82+
%add = add <6 x i16> %l, %l
83+
store <6 x i16> %add, ptr %dst, align 1
84+
ret void
85+
}
86+
87+
define void @vec5_i16(ptr %src, ptr %dst) {
88+
; CHECK-LABEL: 'vec5_i16'
89+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %l = load <5 x i16>, ptr %src, align 1
90+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add = add <5 x i16> %l, %l
91+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <5 x i16> %add, ptr %dst, align 1
92+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
93+
;
94+
%l = load <5 x i16>, ptr %src, align 1
95+
%add = add <5 x i16> %l, %l
96+
store <5 x i16> %add, ptr %dst, align 1
97+
ret void
98+
}
99+
100+
define void @vec3_i16_zext_i32(<3 x i32> %a, <3 x i32> %b, ptr %src, ptr %dst) {
101+
; CHECK-LABEL: 'vec3_i16_zext_i32'
102+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %l = load <3 x i16>, ptr %src, align 1
103+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %l.ext = zext <3 x i16> %l to <3 x i32>
104+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add = add <3 x i32> %l.ext, %b
105+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub = sub <3 x i32> %add, %a
106+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub.trunc = trunc <3 x i32> %sub to <3 x i16>
107+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <3 x i16> %sub.trunc, ptr %dst, align 1
108+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
109+
;
110+
%l = load <3 x i16>, ptr %src, align 1
111+
%l.ext = zext <3 x i16> %l to <3 x i32>
112+
%add = add <3 x i32> %l.ext, %b
113+
%sub = sub <3 x i32> %add, %a
114+
%sub.trunc = trunc <3 x i32> %sub to <3 x i16>
115+
store <3 x i16> %sub.trunc, ptr %dst, align 1
116+
ret void
117+
}
118+
119+
define void @vec3_i8(<3 x i8> %a, <3 x i8> %b, ptr %src, ptr %dst) {
120+
; CHECK-LABEL: 'vec3_i8'
121+
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %l = load <3 x i8>, ptr %src, align 1
122+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add = add <3 x i8> %l, %b
123+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp uge <3 x i8> %add, %a
124+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub = sub <3 x i8> %add, %a
125+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sel = select <3 x i1> %cmp, <3 x i8> %add, <3 x i8> %sub
126+
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <3 x i8> %sel, ptr %dst, align 1
127+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
128+
;
129+
%l = load <3 x i8>, ptr %src, align 1
130+
%add = add <3 x i8> %l, %b
131+
%cmp = icmp uge <3 x i8> %add, %a
132+
%sub = sub <3 x i8> %add, %a
133+
%sel = select <3 x i1> %cmp, <3 x i8> %add, <3 x i8> %sub
134+
store <3 x i8> %sel, ptr %dst, align 1
135+
ret void
136+
}
137+
138+
define void @vec3_i8_zext_i32(<3 x i32> %a, <3 x i32> %b, ptr %src, ptr %dst) {
139+
; CHECK-LABEL: 'vec3_i8_zext_i32'
140+
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %l = load <3 x i8>, ptr %src, align 1
141+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %l.ext = zext <3 x i8> %l to <3 x i32>
142+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add = add <3 x i32> %l.ext, %b
143+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub = sub <3 x i32> %add, %a
144+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub.trunc = trunc <3 x i32> %sub to <3 x i8>
145+
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <3 x i8> %sub.trunc, ptr %dst, align 1
146+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
147+
;
148+
%l = load <3 x i8>, ptr %src, align 1
149+
%l.ext = zext <3 x i8> %l to <3 x i32>
150+
%add = add <3 x i32> %l.ext, %b
151+
%sub = sub <3 x i32> %add, %a
152+
%sub.trunc = trunc <3 x i32> %sub to <3 x i8>
153+
store <3 x i8> %sub.trunc, ptr %dst, align 1
154+
ret void
155+
}
156+
157+
define void @vec3_i8_sext_i32(<3 x i32> %a, <3 x i32> %b, ptr %src, ptr %dst) {
158+
; CHECK-LABEL: 'vec3_i8_sext_i32'
159+
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %l = load <3 x i8>, ptr %src, align 1
160+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %l.ext = sext <3 x i8> %l to <3 x i32>
161+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add = add <3 x i32> %l.ext, %b
162+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub = sub <3 x i32> %add, %a
163+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub.trunc = trunc <3 x i32> %sub to <3 x i8>
164+
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <3 x i8> %sub.trunc, ptr %dst, align 1
165+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
166+
;
167+
%l = load <3 x i8>, ptr %src, align 1
168+
%l.ext = sext <3 x i8> %l to <3 x i32>
169+
%add = add <3 x i32> %l.ext, %b
170+
%sub = sub <3 x i32> %add, %a
171+
%sub.trunc = trunc <3 x i32> %sub to <3 x i8>
172+
store <3 x i8> %sub.trunc, ptr %dst, align 1
173+
ret void
174+
}
175+
176+
define void @vec3_i30(<3 x i30> %a, <3 x i30> %b, ptr %src, ptr %dst) {
177+
; CHECK-LABEL: 'vec3_i30'
178+
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %l = load <3 x i30>, ptr %src, align 1
179+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add = add <3 x i30> %l, %b
180+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub = sub <3 x i30> %add, %a
181+
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <3 x i30> %sub, ptr %dst, align 1
182+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
183+
;
184+
%l = load <3 x i30>, ptr %src, align 1
185+
%add = add <3 x i30> %l, %b
186+
%sub = sub <3 x i30> %add, %a
187+
store <3 x i30> %sub, ptr %dst, align 1
188+
ret void
189+
}
190+
191+
define void @vec3_float(<3 x float> %a, <3 x float> %b, ptr %src, ptr %dst) {
192+
; CHECK-LABEL: 'vec3_float'
193+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %l = load <3 x float>, ptr %src, align 1
194+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add = fadd <3 x float> %l, %b
195+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub = fsub <3 x float> %add, %a
196+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <3 x float> %sub, ptr %dst, align 1
197+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
198+
;
199+
%l = load <3 x float>, ptr %src, align 1
200+
%add = fadd <3 x float> %l, %b
201+
%sub = fsub <3 x float> %add, %a
202+
store <3 x float> %sub, ptr %dst, align 1
203+
ret void
204+
}
205+
206+
define void @vec3_half(<3 x half> %a, <3 x half> %b, ptr %src, ptr %dst) {
207+
; CHECK-LABEL: 'vec3_half'
208+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %l = load <3 x half>, ptr %src, align 1
209+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add = fadd <3 x half> %l, %b
210+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub = fsub <3 x half> %add, %a
211+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <3 x half> %sub, ptr %dst, align 1
212+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
213+
;
214+
%l = load <3 x half>, ptr %src, align 1
215+
%add = fadd <3 x half> %l, %b
216+
%sub = fsub <3 x half> %add, %a
217+
store <3 x half> %sub, ptr %dst, align 1
218+
ret void
219+
}
220+
221+
define void @vec15_i8(ptr %src, ptr %dst) {
222+
; CHECK-LABEL: 'vec15_i8'
223+
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %l = load <15 x i8>, ptr %src, align 1
224+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add = add <15 x i8> %l, %l
225+
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <15 x i8> %add, ptr %dst, align 1
226+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
227+
;
228+
%l = load <15 x i8>, ptr %src, align 1
229+
%add = add <15 x i8> %l, %l
230+
store <15 x i8> %add, ptr %dst, align 1
231+
ret void
232+
}
233+
234+
define void @vec14_i8(ptr %src, ptr %dst) {
235+
; CHECK-LABEL: 'vec14_i8'
236+
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %l = load <14 x i8>, ptr %src, align 1
237+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add = add <14 x i8> %l, %l
238+
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <14 x i8> %add, ptr %dst, align 1
239+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
240+
;
241+
%l = load <14 x i8>, ptr %src, align 1
242+
%add = add <14 x i8> %l, %l
243+
store <14 x i8> %add, ptr %dst, align 1
244+
ret void
245+
}
246+
247+
define void @vec13_i8(ptr %src, ptr %dst) {
248+
; CHECK-LABEL: 'vec13_i8'
249+
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %l = load <13 x i8>, ptr %src, align 1
250+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add = add <13 x i8> %l, %l
251+
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <13 x i8> %add, ptr %dst, align 1
252+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
253+
;
254+
%l = load <13 x i8>, ptr %src, align 1
255+
%add = add <13 x i8> %l, %l
256+
store <13 x i8> %add, ptr %dst, align 1
257+
ret void
258+
}
259+
260+
define void @vec12_i8(ptr %src, ptr %dst) {
261+
; CHECK-LABEL: 'vec12_i8'
262+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %l = load <12 x i8>, ptr %src, align 1
263+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add = add <12 x i8> %l, %l
264+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <12 x i8> %add, ptr %dst, align 1
265+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
266+
;
267+
%l = load <12 x i8>, ptr %src, align 1
268+
%add = add <12 x i8> %l, %l
269+
store <12 x i8> %add, ptr %dst, align 1
270+
ret void
271+
}
272+
273+
define void @vec11_i8(ptr %src, ptr %dst) {
274+
; CHECK-LABEL: 'vec11_i8'
275+
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %l = load <11 x i8>, ptr %src, align 1
276+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add = add <11 x i8> %l, %l
277+
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <11 x i8> %add, ptr %dst, align 1
278+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
279+
;
280+
%l = load <11 x i8>, ptr %src, align 1
281+
%add = add <11 x i8> %l, %l
282+
store <11 x i8> %add, ptr %dst, align 1
283+
ret void
284+
}
285+
286+
define void @vec7_i8(<7 x i8> %a, <7 x i8> %b, ptr %src, ptr %dst) {
287+
; CHECK-LABEL: 'vec7_i8'
288+
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %l = load <7 x i8>, ptr %src, align 1
289+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add = add <7 x i8> %l, %b
290+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub = sub <7 x i8> %add, %a
291+
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <7 x i8> %sub, ptr %dst, align 1
292+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
293+
;
294+
%l = load <7 x i8>, ptr %src, align 1
295+
%add = add <7 x i8> %l, %b
296+
%sub = sub <7 x i8> %add, %a
297+
store <7 x i8> %sub, ptr %dst, align 1
298+
ret void
299+
}
300+
301+
define void @vec6_i8(ptr %src, ptr %dst) {
302+
; CHECK-LABEL: 'vec6_i8'
303+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %l = load <6 x i8>, ptr %src, align 1
304+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add = add <6 x i8> %l, %l
305+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <6 x i8> %add, ptr %dst, align 1
306+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
307+
;
308+
%l = load <6 x i8>, ptr %src, align 1
309+
%add = add <6 x i8> %l, %l
310+
store <6 x i8> %add, ptr %dst, align 1
311+
ret void
312+
}
313+
314+
define void @vec5_i8(ptr %src, ptr %dst) {
315+
; CHECK-LABEL: 'vec5_i8'
316+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %l = load <5 x i8>, ptr %src, align 1
317+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add = add <5 x i8> %l, %l
318+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <5 x i8> %add, ptr %dst, align 1
319+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
320+
;
321+
%l = load <5 x i8>, ptr %src, align 1
322+
%add = add <5 x i8> %l, %l
323+
store <5 x i8> %add, ptr %dst, align 1
324+
ret void
325+
}
326+
327+

0 commit comments

Comments
 (0)