Skip to content

Commit cc82f12

Browse files
authored
[AArch64] Update latencies for Cortex-A510 scheduling model (#87293)
Updated according to the Software Optimization Guide for Arm® Cortex®‑A510 Core Revision: r1p3 Issue 6.0.
1 parent 58a08e1 commit cc82f12

File tree

209 files changed

+6488
-6481
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

209 files changed

+6488
-6481
lines changed

llvm/lib/Target/AArch64/AArch64SchedA510.td

Lines changed: 75 additions & 70 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -140,10 +140,10 @@ define <8 x i16> @combine_vec_udiv_nonuniform3(<8 x i16> %x) {
140140
; GISEL-NEXT: umull2 v2.4s, v0.8h, v1.8h
141141
; GISEL-NEXT: umull v1.4s, v0.4h, v1.4h
142142
; GISEL-NEXT: uzp2 v1.8h, v1.8h, v2.8h
143+
; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI3_0]
143144
; GISEL-NEXT: sub v0.8h, v0.8h, v1.8h
144145
; GISEL-NEXT: usra v1.8h, v0.8h, #1
145-
; GISEL-NEXT: ldr q0, [x8, :lo12:.LCPI3_0]
146-
; GISEL-NEXT: neg v0.8h, v0.8h
146+
; GISEL-NEXT: neg v0.8h, v2.8h
147147
; GISEL-NEXT: ushl v0.8h, v1.8h, v0.8h
148148
; GISEL-NEXT: ret
149149
%1 = udiv <8 x i16> %x, <i16 7, i16 23, i16 25, i16 27, i16 31, i16 47, i16 63, i16 127>
@@ -170,13 +170,13 @@ define <16 x i8> @combine_vec_udiv_nonuniform4(<16 x i8> %x) {
170170
; GISEL-LABEL: combine_vec_udiv_nonuniform4:
171171
; GISEL: // %bb.0:
172172
; GISEL-NEXT: adrp x8, .LCPI4_2
173+
; GISEL-NEXT: adrp x9, .LCPI4_0
173174
; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI4_2]
174175
; GISEL-NEXT: adrp x8, .LCPI4_1
176+
; GISEL-NEXT: ldr q4, [x9, :lo12:.LCPI4_0]
175177
; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI4_1]
176-
; GISEL-NEXT: adrp x8, .LCPI4_0
177178
; GISEL-NEXT: umull2 v2.8h, v0.16b, v1.16b
178179
; GISEL-NEXT: umull v1.8h, v0.8b, v1.8b
179-
; GISEL-NEXT: ldr q4, [x8, :lo12:.LCPI4_0]
180180
; GISEL-NEXT: uzp2 v1.16b, v1.16b, v2.16b
181181
; GISEL-NEXT: neg v2.16b, v3.16b
182182
; GISEL-NEXT: shl v3.16b, v4.16b, #7

llvm/test/CodeGen/AArch64/aarch64-addv.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -101,12 +101,12 @@ define i32 @oversized_ADDV_256(ptr noalias nocapture readonly %arg1, ptr noalias
101101
; GISEL-NEXT: ushll v2.8h, v2.8b, #0
102102
; GISEL-NEXT: usubl v3.4s, v1.4h, v2.4h
103103
; GISEL-NEXT: usubl2 v1.4s, v1.8h, v2.8h
104-
; GISEL-NEXT: neg v2.4s, v3.4s
105-
; GISEL-NEXT: neg v4.4s, v1.4s
106-
; GISEL-NEXT: cmgt v5.4s, v0.4s, v3.4s
104+
; GISEL-NEXT: cmgt v2.4s, v0.4s, v3.4s
107105
; GISEL-NEXT: cmgt v0.4s, v0.4s, v1.4s
108-
; GISEL-NEXT: bif v2.16b, v3.16b, v5.16b
109-
; GISEL-NEXT: bsl v0.16b, v4.16b, v1.16b
106+
; GISEL-NEXT: neg v4.4s, v3.4s
107+
; GISEL-NEXT: neg v5.4s, v1.4s
108+
; GISEL-NEXT: bsl v2.16b, v4.16b, v3.16b
109+
; GISEL-NEXT: bsl v0.16b, v5.16b, v1.16b
110110
; GISEL-NEXT: add v0.4s, v2.4s, v0.4s
111111
; GISEL-NEXT: addv s0, v0.4s
112112
; GISEL-NEXT: fmov w0, s0

llvm/test/CodeGen/AArch64/aarch64-dup-ext-scalable.ll

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44
define <vscale x 2 x i16> @dupsext_v2i8_v2i16(i8 %src, <vscale x 2 x i16> %b) {
55
; CHECK-LABEL: dupsext_v2i8_v2i16:
66
; CHECK: // %bb.0: // %entry
7-
; CHECK-NEXT: ptrue p0.d
87
; CHECK-NEXT: sxtb w8, w0
8+
; CHECK-NEXT: ptrue p0.d
99
; CHECK-NEXT: mov z1.d, x8
1010
; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d
1111
; CHECK-NEXT: ret
@@ -20,8 +20,8 @@ entry:
2020
define <vscale x 4 x i16> @dupsext_v4i8_v4i16(i8 %src, <vscale x 4 x i16> %b) {
2121
; CHECK-LABEL: dupsext_v4i8_v4i16:
2222
; CHECK: // %bb.0: // %entry
23-
; CHECK-NEXT: ptrue p0.s
2423
; CHECK-NEXT: sxtb w8, w0
24+
; CHECK-NEXT: ptrue p0.s
2525
; CHECK-NEXT: mov z1.s, w8
2626
; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s
2727
; CHECK-NEXT: ret
@@ -36,8 +36,8 @@ entry:
3636
define <vscale x 8 x i16> @dupsext_v8i8_v8i16(i8 %src, <vscale x 8 x i16> %b) {
3737
; CHECK-LABEL: dupsext_v8i8_v8i16:
3838
; CHECK: // %bb.0: // %entry
39-
; CHECK-NEXT: ptrue p0.h
4039
; CHECK-NEXT: sxtb w8, w0
40+
; CHECK-NEXT: ptrue p0.h
4141
; CHECK-NEXT: mov z1.h, w8
4242
; CHECK-NEXT: mul z0.h, p0/m, z0.h, z1.h
4343
; CHECK-NEXT: ret
@@ -52,8 +52,8 @@ entry:
5252
define <vscale x 2 x i32> @dupsext_v2i8_v2i32(i8 %src, <vscale x 2 x i32> %b) {
5353
; CHECK-LABEL: dupsext_v2i8_v2i32:
5454
; CHECK: // %bb.0: // %entry
55-
; CHECK-NEXT: ptrue p0.d
5655
; CHECK-NEXT: sxtb w8, w0
56+
; CHECK-NEXT: ptrue p0.d
5757
; CHECK-NEXT: mov z1.d, x8
5858
; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d
5959
; CHECK-NEXT: ret
@@ -68,8 +68,8 @@ entry:
6868
define <vscale x 4 x i32> @dupsext_v4i8_v4i32(i8 %src, <vscale x 4 x i32> %b) {
6969
; CHECK-LABEL: dupsext_v4i8_v4i32:
7070
; CHECK: // %bb.0: // %entry
71-
; CHECK-NEXT: ptrue p0.s
7271
; CHECK-NEXT: sxtb w8, w0
72+
; CHECK-NEXT: ptrue p0.s
7373
; CHECK-NEXT: mov z1.s, w8
7474
; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s
7575
; CHECK-NEXT: ret
@@ -84,9 +84,9 @@ entry:
8484
define <vscale x 2 x i64> @dupsext_v2i8_v2i64(i8 %src, <vscale x 2 x i64> %b) {
8585
; CHECK-LABEL: dupsext_v2i8_v2i64:
8686
; CHECK: // %bb.0: // %entry
87-
; CHECK-NEXT: ptrue p0.d
8887
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
8988
; CHECK-NEXT: sxtb x8, w0
89+
; CHECK-NEXT: ptrue p0.d
9090
; CHECK-NEXT: mov z1.d, x8
9191
; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d
9292
; CHECK-NEXT: ret
@@ -101,8 +101,8 @@ entry:
101101
define <vscale x 2 x i32> @dupsext_v2i16_v2i32(i16 %src, <vscale x 2 x i32> %b) {
102102
; CHECK-LABEL: dupsext_v2i16_v2i32:
103103
; CHECK: // %bb.0: // %entry
104-
; CHECK-NEXT: ptrue p0.d
105104
; CHECK-NEXT: sxth w8, w0
105+
; CHECK-NEXT: ptrue p0.d
106106
; CHECK-NEXT: mov z1.d, x8
107107
; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d
108108
; CHECK-NEXT: ret
@@ -117,8 +117,8 @@ entry:
117117
define <vscale x 4 x i32> @dupsext_v4i16_v4i32(i16 %src, <vscale x 4 x i32> %b) {
118118
; CHECK-LABEL: dupsext_v4i16_v4i32:
119119
; CHECK: // %bb.0: // %entry
120-
; CHECK-NEXT: ptrue p0.s
121120
; CHECK-NEXT: sxth w8, w0
121+
; CHECK-NEXT: ptrue p0.s
122122
; CHECK-NEXT: mov z1.s, w8
123123
; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s
124124
; CHECK-NEXT: ret
@@ -133,9 +133,9 @@ entry:
133133
define <vscale x 2 x i64> @dupsext_v2i16_v2i64(i16 %src, <vscale x 2 x i64> %b) {
134134
; CHECK-LABEL: dupsext_v2i16_v2i64:
135135
; CHECK: // %bb.0: // %entry
136-
; CHECK-NEXT: ptrue p0.d
137136
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
138137
; CHECK-NEXT: sxth x8, w0
138+
; CHECK-NEXT: ptrue p0.d
139139
; CHECK-NEXT: mov z1.d, x8
140140
; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d
141141
; CHECK-NEXT: ret
@@ -150,9 +150,9 @@ entry:
150150
define <vscale x 2 x i64> @dupsext_v2i32_v2i64(i32 %src, <vscale x 2 x i64> %b) {
151151
; CHECK-LABEL: dupsext_v2i32_v2i64:
152152
; CHECK: // %bb.0: // %entry
153-
; CHECK-NEXT: ptrue p0.d
154153
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
155154
; CHECK-NEXT: sxtw x8, w0
155+
; CHECK-NEXT: ptrue p0.d
156156
; CHECK-NEXT: mov z1.d, x8
157157
; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d
158158
; CHECK-NEXT: ret
@@ -167,8 +167,8 @@ entry:
167167
define <vscale x 2 x i16> @dupzext_v2i8_v2i16(i8 %src, <vscale x 2 x i16> %b) {
168168
; CHECK-LABEL: dupzext_v2i8_v2i16:
169169
; CHECK: // %bb.0: // %entry
170-
; CHECK-NEXT: ptrue p0.d
171170
; CHECK-NEXT: and w8, w0, #0xff
171+
; CHECK-NEXT: ptrue p0.d
172172
; CHECK-NEXT: mov z1.d, x8
173173
; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d
174174
; CHECK-NEXT: ret
@@ -183,8 +183,8 @@ entry:
183183
define <vscale x 4 x i16> @dupzext_v4i8_v4i16(i8 %src, <vscale x 4 x i16> %b) {
184184
; CHECK-LABEL: dupzext_v4i8_v4i16:
185185
; CHECK: // %bb.0: // %entry
186-
; CHECK-NEXT: ptrue p0.s
187186
; CHECK-NEXT: and w8, w0, #0xff
187+
; CHECK-NEXT: ptrue p0.s
188188
; CHECK-NEXT: mov z1.s, w8
189189
; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s
190190
; CHECK-NEXT: ret
@@ -199,8 +199,8 @@ entry:
199199
define <vscale x 8 x i16> @dupzext_v8i8_v8i16(i8 %src, <vscale x 8 x i16> %b) {
200200
; CHECK-LABEL: dupzext_v8i8_v8i16:
201201
; CHECK: // %bb.0: // %entry
202-
; CHECK-NEXT: ptrue p0.h
203202
; CHECK-NEXT: and w8, w0, #0xff
203+
; CHECK-NEXT: ptrue p0.h
204204
; CHECK-NEXT: mov z1.h, w8
205205
; CHECK-NEXT: mul z0.h, p0/m, z0.h, z1.h
206206
; CHECK-NEXT: ret
@@ -215,8 +215,8 @@ entry:
215215
define <vscale x 2 x i32> @dupzext_v2i8_v2i32(i8 %src, <vscale x 2 x i32> %b) {
216216
; CHECK-LABEL: dupzext_v2i8_v2i32:
217217
; CHECK: // %bb.0: // %entry
218-
; CHECK-NEXT: ptrue p0.d
219218
; CHECK-NEXT: and w8, w0, #0xff
219+
; CHECK-NEXT: ptrue p0.d
220220
; CHECK-NEXT: mov z1.d, x8
221221
; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d
222222
; CHECK-NEXT: ret
@@ -231,8 +231,8 @@ entry:
231231
define <vscale x 4 x i32> @dupzext_v4i8_v4i32(i8 %src, <vscale x 4 x i32> %b) {
232232
; CHECK-LABEL: dupzext_v4i8_v4i32:
233233
; CHECK: // %bb.0: // %entry
234-
; CHECK-NEXT: ptrue p0.s
235234
; CHECK-NEXT: and w8, w0, #0xff
235+
; CHECK-NEXT: ptrue p0.s
236236
; CHECK-NEXT: mov z1.s, w8
237237
; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s
238238
; CHECK-NEXT: ret
@@ -247,9 +247,9 @@ entry:
247247
define <vscale x 2 x i64> @dupzext_v2i8_v2i64(i8 %src, <vscale x 2 x i64> %b) {
248248
; CHECK-LABEL: dupzext_v2i8_v2i64:
249249
; CHECK: // %bb.0: // %entry
250-
; CHECK-NEXT: ptrue p0.d
251250
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
252251
; CHECK-NEXT: and x8, x0, #0xff
252+
; CHECK-NEXT: ptrue p0.d
253253
; CHECK-NEXT: mov z1.d, x8
254254
; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d
255255
; CHECK-NEXT: ret
@@ -264,8 +264,8 @@ entry:
264264
define <vscale x 2 x i32> @dupzext_v2i16_v2i32(i16 %src, <vscale x 2 x i32> %b) {
265265
; CHECK-LABEL: dupzext_v2i16_v2i32:
266266
; CHECK: // %bb.0: // %entry
267-
; CHECK-NEXT: ptrue p0.d
268267
; CHECK-NEXT: and w8, w0, #0xffff
268+
; CHECK-NEXT: ptrue p0.d
269269
; CHECK-NEXT: mov z1.d, x8
270270
; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d
271271
; CHECK-NEXT: ret
@@ -280,8 +280,8 @@ entry:
280280
define <vscale x 4 x i32> @dupzext_v4i16_v4i32(i16 %src, <vscale x 4 x i32> %b) {
281281
; CHECK-LABEL: dupzext_v4i16_v4i32:
282282
; CHECK: // %bb.0: // %entry
283-
; CHECK-NEXT: ptrue p0.s
284283
; CHECK-NEXT: and w8, w0, #0xffff
284+
; CHECK-NEXT: ptrue p0.s
285285
; CHECK-NEXT: mov z1.s, w8
286286
; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s
287287
; CHECK-NEXT: ret
@@ -296,9 +296,9 @@ entry:
296296
define <vscale x 2 x i64> @dupzext_v2i16_v2i64(i16 %src, <vscale x 2 x i64> %b) {
297297
; CHECK-LABEL: dupzext_v2i16_v2i64:
298298
; CHECK: // %bb.0: // %entry
299-
; CHECK-NEXT: ptrue p0.d
300299
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
301300
; CHECK-NEXT: and x8, x0, #0xffff
301+
; CHECK-NEXT: ptrue p0.d
302302
; CHECK-NEXT: mov z1.d, x8
303303
; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d
304304
; CHECK-NEXT: ret
@@ -313,8 +313,8 @@ entry:
313313
define <vscale x 2 x i64> @dupzext_v2i32_v2i64(i32 %src, <vscale x 2 x i64> %b) {
314314
; CHECK-LABEL: dupzext_v2i32_v2i64:
315315
; CHECK: // %bb.0: // %entry
316-
; CHECK-NEXT: ptrue p0.d
317316
; CHECK-NEXT: mov w8, w0
317+
; CHECK-NEXT: ptrue p0.d
318318
; CHECK-NEXT: mov z1.d, x8
319319
; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d
320320
; CHECK-NEXT: ret

llvm/test/CodeGen/AArch64/aarch64-smull.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -257,8 +257,8 @@ define <2 x i64> @smull_zext_v2i32_v2i64(ptr %A, ptr %B) nounwind {
257257
; CHECK-SVE-LABEL: smull_zext_v2i32_v2i64:
258258
; CHECK-SVE: // %bb.0:
259259
; CHECK-SVE-NEXT: ldrh w8, [x0]
260-
; CHECK-SVE-NEXT: ptrue p0.d, vl2
261260
; CHECK-SVE-NEXT: ldrh w9, [x0, #2]
261+
; CHECK-SVE-NEXT: ptrue p0.d, vl2
262262
; CHECK-SVE-NEXT: ldr d0, [x1]
263263
; CHECK-SVE-NEXT: fmov d1, x8
264264
; CHECK-SVE-NEXT: sshll v0.2d, v0.2s, #0

0 commit comments

Comments
 (0)