Skip to content

Commit 1dfeb67

Browse files
committed
modify tests
1 parent 6f5b0dc commit 1dfeb67

12 files changed

+2371
-738
lines changed

llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll

Lines changed: 67 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -146,13 +146,28 @@ define void @insert_vec_v6i64_uaddlv_from_v4i32(ptr %0) {
146146
; CHECK-LABEL: insert_vec_v6i64_uaddlv_from_v4i32:
147147
; CHECK: ; %bb.0: ; %entry
148148
; CHECK-NEXT: movi.2d v0, #0000000000000000
149+
; CHECK-NEXT: movi.2d v2, #0x000000ffffffff
149150
; CHECK-NEXT: uaddlv.4s d1, v0
150151
; CHECK-NEXT: mov.d v0[0], v1[0]
151-
; CHECK-NEXT: movi.2d v1, #0000000000000000
152-
; CHECK-NEXT: ucvtf.2d v0, v0
153-
; CHECK-NEXT: str d1, [x0, #16]
154-
; CHECK-NEXT: fcvtn v0.2s, v0.2d
155-
; CHECK-NEXT: str q0, [x0]
152+
; CHECK-NEXT: ushr.2d v1, v0, #32
153+
; CHECK-NEXT: and.16b v0, v0, v2
154+
; CHECK-NEXT: mov.d x8, v1[1]
155+
; CHECK-NEXT: fmov x9, d1
156+
; CHECK-NEXT: scvtf s2, x9
157+
; CHECK-NEXT: mov w9, #1333788672 ; =0x4f800000
158+
; CHECK-NEXT: scvtf s1, x8
159+
; CHECK-NEXT: mov.d x8, v0[1]
160+
; CHECK-NEXT: dup.2s v3, w9
161+
; CHECK-NEXT: fmov x9, d0
162+
; CHECK-NEXT: scvtf s0, x8
163+
; CHECK-NEXT: mov.s v2[1], v1[0]
164+
; CHECK-NEXT: scvtf s1, x9
165+
; CHECK-NEXT: fmul.2s v2, v2, v3
166+
; CHECK-NEXT: mov.s v1[1], v0[0]
167+
; CHECK-NEXT: movi.2d v0, #0000000000000000
168+
; CHECK-NEXT: fadd.2s v1, v2, v1
169+
; CHECK-NEXT: str d0, [x0, #16]
170+
; CHECK-NEXT: str q1, [x0]
156171
; CHECK-NEXT: ret
157172

158173
entry:
@@ -167,10 +182,25 @@ define void @insert_vec_v2i64_uaddlv_from_v4i32(ptr %0) {
167182
; CHECK-LABEL: insert_vec_v2i64_uaddlv_from_v4i32:
168183
; CHECK: ; %bb.0: ; %entry
169184
; CHECK-NEXT: movi.2d v0, #0000000000000000
185+
; CHECK-NEXT: movi.2d v2, #0x000000ffffffff
170186
; CHECK-NEXT: uaddlv.4s d1, v0
171187
; CHECK-NEXT: mov.d v0[0], v1[0]
172-
; CHECK-NEXT: ucvtf.2d v0, v0
173-
; CHECK-NEXT: fcvtn v0.2s, v0.2d
188+
; CHECK-NEXT: ushr.2d v1, v0, #32
189+
; CHECK-NEXT: and.16b v0, v0, v2
190+
; CHECK-NEXT: mov.d x8, v1[1]
191+
; CHECK-NEXT: fmov x9, d1
192+
; CHECK-NEXT: scvtf s2, x9
193+
; CHECK-NEXT: mov w9, #1333788672 ; =0x4f800000
194+
; CHECK-NEXT: scvtf s1, x8
195+
; CHECK-NEXT: mov.d x8, v0[1]
196+
; CHECK-NEXT: dup.2s v3, w9
197+
; CHECK-NEXT: fmov x9, d0
198+
; CHECK-NEXT: scvtf s0, x8
199+
; CHECK-NEXT: mov.s v2[1], v1[0]
200+
; CHECK-NEXT: scvtf s1, x9
201+
; CHECK-NEXT: fmul.2s v2, v2, v3
202+
; CHECK-NEXT: mov.s v1[1], v0[0]
203+
; CHECK-NEXT: fadd.2s v0, v2, v1
174204
; CHECK-NEXT: str d0, [x0]
175205
; CHECK-NEXT: ret
176206

@@ -186,11 +216,26 @@ define void @insert_vec_v5i64_uaddlv_from_v4i32(ptr %0) {
186216
; CHECK-LABEL: insert_vec_v5i64_uaddlv_from_v4i32:
187217
; CHECK: ; %bb.0: ; %entry
188218
; CHECK-NEXT: movi.2d v0, #0000000000000000
219+
; CHECK-NEXT: movi.2d v2, #0x000000ffffffff
189220
; CHECK-NEXT: str wzr, [x0, #16]
190221
; CHECK-NEXT: uaddlv.4s d1, v0
191222
; CHECK-NEXT: mov.d v0[0], v1[0]
192-
; CHECK-NEXT: ucvtf.2d v0, v0
193-
; CHECK-NEXT: fcvtn v0.2s, v0.2d
223+
; CHECK-NEXT: ushr.2d v1, v0, #32
224+
; CHECK-NEXT: and.16b v0, v0, v2
225+
; CHECK-NEXT: mov.d x8, v1[1]
226+
; CHECK-NEXT: fmov x9, d1
227+
; CHECK-NEXT: scvtf s2, x9
228+
; CHECK-NEXT: mov w9, #1333788672 ; =0x4f800000
229+
; CHECK-NEXT: scvtf s1, x8
230+
; CHECK-NEXT: mov.d x8, v0[1]
231+
; CHECK-NEXT: dup.2s v3, w9
232+
; CHECK-NEXT: fmov x9, d0
233+
; CHECK-NEXT: scvtf s0, x8
234+
; CHECK-NEXT: mov.s v2[1], v1[0]
235+
; CHECK-NEXT: scvtf s1, x9
236+
; CHECK-NEXT: fmul.2s v2, v2, v3
237+
; CHECK-NEXT: mov.s v1[1], v0[0]
238+
; CHECK-NEXT: fadd.2s v0, v2, v1
194239
; CHECK-NEXT: str q0, [x0]
195240
; CHECK-NEXT: ret
196241

@@ -251,12 +296,20 @@ define void @insert_vec_v16i64_uaddlv_from_v4i16(ptr %0) {
251296
; CHECK-LABEL: insert_vec_v16i64_uaddlv_from_v4i16:
252297
; CHECK: ; %bb.0: ; %entry
253298
; CHECK-NEXT: movi.2d v0, #0000000000000000
254-
; CHECK-NEXT: movi.2d v2, #0000000000000000
255-
; CHECK-NEXT: uaddlv.4h s1, v0
299+
; CHECK-NEXT: movi.2d v1, #0000000000000000
300+
; CHECK-NEXT: mov w9, #1333788672 ; =0x4f800000
301+
; CHECK-NEXT: scvtf s3, xzr
302+
; CHECK-NEXT: dup.2s v4, w9
303+
; CHECK-NEXT: uaddlv.4h s2, v0
256304
; CHECK-NEXT: stp q0, q0, [x0, #32]
257-
; CHECK-NEXT: mov.s v2[0], v1[0]
258-
; CHECK-NEXT: ucvtf.2d v1, v2
259-
; CHECK-NEXT: fcvtn v1.2s, v1.2d
305+
; CHECK-NEXT: mov.s v1[0], v2[0]
306+
; CHECK-NEXT: mov.d x8, v1[1]
307+
; CHECK-NEXT: scvtf s2, x8
308+
; CHECK-NEXT: fmov x8, d1
309+
; CHECK-NEXT: scvtf s1, x8
310+
; CHECK-NEXT: mov.s v1[1], v2[0]
311+
; CHECK-NEXT: fmul.2s v2, v4, v3[0]
312+
; CHECK-NEXT: fadd.2s v1, v2, v1
260313
; CHECK-NEXT: stp q1, q0, [x0]
261314
; CHECK-NEXT: ret
262315

llvm/test/CodeGen/AArch64/arm64-convert-v4f64.ll

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -53,13 +53,40 @@ define <4 x half> @uitofp_v4i64_to_v4f16(ptr %ptr) {
5353
define <4 x bfloat> @uitofp_v4i64_to_v4bf16(ptr %ptr) {
5454
; CHECK-LABEL: uitofp_v4i64_to_v4bf16:
5555
; CHECK: // %bb.0:
56-
; CHECK-NEXT: ldp q0, q1, [x0]
56+
; CHECK-NEXT: ldp q1, q3, [x0]
57+
; CHECK-NEXT: movi v0.2d, #0x000000ffffffff
58+
; CHECK-NEXT: ushr v2.2d, v1.2d, #32
59+
; CHECK-NEXT: ushr v5.2d, v3.2d, #32
60+
; CHECK-NEXT: and v1.16b, v1.16b, v0.16b
61+
; CHECK-NEXT: and v0.16b, v3.16b, v0.16b
62+
; CHECK-NEXT: mov x8, v2.d[1]
63+
; CHECK-NEXT: fmov x10, d2
64+
; CHECK-NEXT: mov x9, v1.d[1]
65+
; CHECK-NEXT: scvtf s4, x10
66+
; CHECK-NEXT: scvtf s2, x8
67+
; CHECK-NEXT: fmov x8, d1
68+
; CHECK-NEXT: scvtf s1, x9
69+
; CHECK-NEXT: mov x9, v5.d[1]
70+
; CHECK-NEXT: scvtf s3, x8
71+
; CHECK-NEXT: fmov x8, d5
72+
; CHECK-NEXT: mov v4.s[1], v2.s[0]
73+
; CHECK-NEXT: scvtf s2, x8
74+
; CHECK-NEXT: fmov x8, d0
75+
; CHECK-NEXT: mov v3.s[1], v1.s[0]
76+
; CHECK-NEXT: scvtf s1, x8
77+
; CHECK-NEXT: mov x8, v0.d[1]
78+
; CHECK-NEXT: scvtf s0, x9
79+
; CHECK-NEXT: mov v4.s[2], v2.s[0]
5780
; CHECK-NEXT: movi v2.4s, #127, msl #8
58-
; CHECK-NEXT: ucvtf v0.2d, v0.2d
59-
; CHECK-NEXT: ucvtf v1.2d, v1.2d
60-
; CHECK-NEXT: fcvtn v0.2s, v0.2d
61-
; CHECK-NEXT: fcvtn2 v0.4s, v1.2d
81+
; CHECK-NEXT: mov v3.s[2], v1.s[0]
82+
; CHECK-NEXT: scvtf s1, x8
83+
; CHECK-NEXT: mov w8, #1333788672 // =0x4f800000
84+
; CHECK-NEXT: mov v4.s[3], v0.s[0]
85+
; CHECK-NEXT: dup v0.4s, w8
86+
; CHECK-NEXT: mov v3.s[3], v1.s[0]
6287
; CHECK-NEXT: movi v1.4s, #1
88+
; CHECK-NEXT: fmul v0.4s, v4.4s, v0.4s
89+
; CHECK-NEXT: fadd v0.4s, v0.4s, v3.4s
6390
; CHECK-NEXT: ushr v3.4s, v0.4s, #16
6491
; CHECK-NEXT: add v2.4s, v0.4s, v2.4s
6592
; CHECK-NEXT: and v1.16b, v3.16b, v1.16b

llvm/test/CodeGen/AArch64/bf16-v4-instructions.ll

Lines changed: 95 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -310,29 +310,43 @@ define <4 x bfloat> @sitofp_i32(<4 x i32> %a) #0 {
310310
define <4 x bfloat> @sitofp_i64(<4 x i64> %a) #0 {
311311
; CHECK-CVT-LABEL: sitofp_i64:
312312
; CHECK-CVT: // %bb.0:
313-
; CHECK-CVT-NEXT: scvtf v0.2d, v0.2d
314-
; CHECK-CVT-NEXT: scvtf v1.2d, v1.2d
315-
; CHECK-CVT-NEXT: movi v2.4s, #127, msl #8
316-
; CHECK-CVT-NEXT: fcvtn v0.2s, v0.2d
317-
; CHECK-CVT-NEXT: fcvtn2 v0.4s, v1.2d
318-
; CHECK-CVT-NEXT: movi v1.4s, #1
319-
; CHECK-CVT-NEXT: ushr v3.4s, v0.4s, #16
320-
; CHECK-CVT-NEXT: add v2.4s, v0.4s, v2.4s
321-
; CHECK-CVT-NEXT: and v1.16b, v3.16b, v1.16b
322-
; CHECK-CVT-NEXT: fcmeq v3.4s, v0.4s, v0.4s
323-
; CHECK-CVT-NEXT: orr v0.4s, #64, lsl #16
324-
; CHECK-CVT-NEXT: add v1.4s, v1.4s, v2.4s
325-
; CHECK-CVT-NEXT: bit v0.16b, v1.16b, v3.16b
313+
; CHECK-CVT-NEXT: mov x8, v0.d[1]
314+
; CHECK-CVT-NEXT: fmov x9, d0
315+
; CHECK-CVT-NEXT: scvtf s2, x9
316+
; CHECK-CVT-NEXT: mov x9, v1.d[1]
317+
; CHECK-CVT-NEXT: scvtf s0, x8
318+
; CHECK-CVT-NEXT: fmov x8, d1
319+
; CHECK-CVT-NEXT: scvtf s1, x8
320+
; CHECK-CVT-NEXT: mov v2.s[1], v0.s[0]
321+
; CHECK-CVT-NEXT: scvtf s0, x9
322+
; CHECK-CVT-NEXT: mov v2.s[2], v1.s[0]
323+
; CHECK-CVT-NEXT: movi v1.4s, #127, msl #8
324+
; CHECK-CVT-NEXT: mov v2.s[3], v0.s[0]
325+
; CHECK-CVT-NEXT: movi v0.4s, #1
326+
; CHECK-CVT-NEXT: ushr v3.4s, v2.4s, #16
327+
; CHECK-CVT-NEXT: add v1.4s, v2.4s, v1.4s
328+
; CHECK-CVT-NEXT: and v0.16b, v3.16b, v0.16b
329+
; CHECK-CVT-NEXT: fcmeq v3.4s, v2.4s, v2.4s
330+
; CHECK-CVT-NEXT: orr v2.4s, #64, lsl #16
331+
; CHECK-CVT-NEXT: add v0.4s, v0.4s, v1.4s
332+
; CHECK-CVT-NEXT: bif v0.16b, v2.16b, v3.16b
326333
; CHECK-CVT-NEXT: shrn v0.4h, v0.4s, #16
327334
; CHECK-CVT-NEXT: ret
328335
;
329336
; CHECK-BF16-LABEL: sitofp_i64:
330337
; CHECK-BF16: // %bb.0:
331-
; CHECK-BF16-NEXT: scvtf v0.2d, v0.2d
332-
; CHECK-BF16-NEXT: scvtf v1.2d, v1.2d
333-
; CHECK-BF16-NEXT: fcvtn v0.2s, v0.2d
334-
; CHECK-BF16-NEXT: fcvtn2 v0.4s, v1.2d
335-
; CHECK-BF16-NEXT: bfcvtn v0.4h, v0.4s
338+
; CHECK-BF16-NEXT: mov x8, v0.d[1]
339+
; CHECK-BF16-NEXT: fmov x9, d0
340+
; CHECK-BF16-NEXT: scvtf s2, x9
341+
; CHECK-BF16-NEXT: mov x9, v1.d[1]
342+
; CHECK-BF16-NEXT: scvtf s0, x8
343+
; CHECK-BF16-NEXT: fmov x8, d1
344+
; CHECK-BF16-NEXT: mov v2.s[1], v0.s[0]
345+
; CHECK-BF16-NEXT: scvtf s0, x8
346+
; CHECK-BF16-NEXT: mov v2.s[2], v0.s[0]
347+
; CHECK-BF16-NEXT: scvtf s0, x9
348+
; CHECK-BF16-NEXT: mov v2.s[3], v0.s[0]
349+
; CHECK-BF16-NEXT: bfcvtn v0.4h, v2.4s
336350
; CHECK-BF16-NEXT: ret
337351
%1 = sitofp <4 x i64> %a to <4 x bfloat>
338352
ret <4 x bfloat> %1
@@ -413,12 +427,39 @@ define <4 x bfloat> @uitofp_i32(<4 x i32> %a) #0 {
413427
define <4 x bfloat> @uitofp_i64(<4 x i64> %a) #0 {
414428
; CHECK-CVT-LABEL: uitofp_i64:
415429
; CHECK-CVT: // %bb.0:
416-
; CHECK-CVT-NEXT: ucvtf v0.2d, v0.2d
417-
; CHECK-CVT-NEXT: ucvtf v1.2d, v1.2d
418-
; CHECK-CVT-NEXT: movi v2.4s, #127, msl #8
419-
; CHECK-CVT-NEXT: fcvtn v0.2s, v0.2d
420-
; CHECK-CVT-NEXT: fcvtn2 v0.4s, v1.2d
430+
; CHECK-CVT-NEXT: movi v2.2d, #0x000000ffffffff
431+
; CHECK-CVT-NEXT: ushr v3.2d, v0.2d, #32
432+
; CHECK-CVT-NEXT: ushr v4.2d, v1.2d, #32
433+
; CHECK-CVT-NEXT: mov x8, v3.d[1]
434+
; CHECK-CVT-NEXT: fmov x10, d3
435+
; CHECK-CVT-NEXT: and v0.16b, v0.16b, v2.16b
436+
; CHECK-CVT-NEXT: and v1.16b, v1.16b, v2.16b
437+
; CHECK-CVT-NEXT: scvtf s3, x10
438+
; CHECK-CVT-NEXT: scvtf s5, x8
439+
; CHECK-CVT-NEXT: fmov x8, d0
440+
; CHECK-CVT-NEXT: mov x9, v0.d[1]
441+
; CHECK-CVT-NEXT: scvtf s2, x8
442+
; CHECK-CVT-NEXT: fmov x8, d4
443+
; CHECK-CVT-NEXT: scvtf s0, x9
444+
; CHECK-CVT-NEXT: mov x9, v4.d[1]
445+
; CHECK-CVT-NEXT: mov v3.s[1], v5.s[0]
446+
; CHECK-CVT-NEXT: scvtf s4, x8
447+
; CHECK-CVT-NEXT: fmov x8, d1
448+
; CHECK-CVT-NEXT: mov v2.s[1], v0.s[0]
449+
; CHECK-CVT-NEXT: scvtf s0, x8
450+
; CHECK-CVT-NEXT: mov x8, v1.d[1]
451+
; CHECK-CVT-NEXT: scvtf s1, x9
452+
; CHECK-CVT-NEXT: mov v3.s[2], v4.s[0]
453+
; CHECK-CVT-NEXT: mov v2.s[2], v0.s[0]
454+
; CHECK-CVT-NEXT: scvtf s0, x8
455+
; CHECK-CVT-NEXT: mov w8, #1333788672 // =0x4f800000
456+
; CHECK-CVT-NEXT: mov v3.s[3], v1.s[0]
457+
; CHECK-CVT-NEXT: dup v1.4s, w8
458+
; CHECK-CVT-NEXT: mov v2.s[3], v0.s[0]
459+
; CHECK-CVT-NEXT: fmul v0.4s, v3.4s, v1.4s
421460
; CHECK-CVT-NEXT: movi v1.4s, #1
461+
; CHECK-CVT-NEXT: fadd v0.4s, v0.4s, v2.4s
462+
; CHECK-CVT-NEXT: movi v2.4s, #127, msl #8
422463
; CHECK-CVT-NEXT: ushr v3.4s, v0.4s, #16
423464
; CHECK-CVT-NEXT: add v2.4s, v0.4s, v2.4s
424465
; CHECK-CVT-NEXT: and v1.16b, v3.16b, v1.16b
@@ -431,10 +472,37 @@ define <4 x bfloat> @uitofp_i64(<4 x i64> %a) #0 {
431472
;
432473
; CHECK-BF16-LABEL: uitofp_i64:
433474
; CHECK-BF16: // %bb.0:
434-
; CHECK-BF16-NEXT: ucvtf v0.2d, v0.2d
435-
; CHECK-BF16-NEXT: ucvtf v1.2d, v1.2d
436-
; CHECK-BF16-NEXT: fcvtn v0.2s, v0.2d
437-
; CHECK-BF16-NEXT: fcvtn2 v0.4s, v1.2d
475+
; CHECK-BF16-NEXT: movi v2.2d, #0x000000ffffffff
476+
; CHECK-BF16-NEXT: ushr v3.2d, v0.2d, #32
477+
; CHECK-BF16-NEXT: ushr v4.2d, v1.2d, #32
478+
; CHECK-BF16-NEXT: mov x8, v3.d[1]
479+
; CHECK-BF16-NEXT: fmov x10, d3
480+
; CHECK-BF16-NEXT: and v0.16b, v0.16b, v2.16b
481+
; CHECK-BF16-NEXT: and v1.16b, v1.16b, v2.16b
482+
; CHECK-BF16-NEXT: scvtf s3, x10
483+
; CHECK-BF16-NEXT: scvtf s5, x8
484+
; CHECK-BF16-NEXT: fmov x8, d0
485+
; CHECK-BF16-NEXT: mov x9, v0.d[1]
486+
; CHECK-BF16-NEXT: scvtf s2, x8
487+
; CHECK-BF16-NEXT: fmov x8, d4
488+
; CHECK-BF16-NEXT: scvtf s0, x9
489+
; CHECK-BF16-NEXT: mov x9, v4.d[1]
490+
; CHECK-BF16-NEXT: mov v3.s[1], v5.s[0]
491+
; CHECK-BF16-NEXT: scvtf s4, x8
492+
; CHECK-BF16-NEXT: fmov x8, d1
493+
; CHECK-BF16-NEXT: mov v2.s[1], v0.s[0]
494+
; CHECK-BF16-NEXT: scvtf s0, x8
495+
; CHECK-BF16-NEXT: mov x8, v1.d[1]
496+
; CHECK-BF16-NEXT: scvtf s1, x9
497+
; CHECK-BF16-NEXT: mov v3.s[2], v4.s[0]
498+
; CHECK-BF16-NEXT: mov v2.s[2], v0.s[0]
499+
; CHECK-BF16-NEXT: scvtf s0, x8
500+
; CHECK-BF16-NEXT: mov w8, #1333788672 // =0x4f800000
501+
; CHECK-BF16-NEXT: mov v3.s[3], v1.s[0]
502+
; CHECK-BF16-NEXT: dup v1.4s, w8
503+
; CHECK-BF16-NEXT: mov v2.s[3], v0.s[0]
504+
; CHECK-BF16-NEXT: fmul v0.4s, v3.4s, v1.4s
505+
; CHECK-BF16-NEXT: fadd v0.4s, v0.4s, v2.4s
438506
; CHECK-BF16-NEXT: bfcvtn v0.4h, v0.4s
439507
; CHECK-BF16-NEXT: ret
440508
%1 = uitofp <4 x i64> %a to <4 x bfloat>

0 commit comments

Comments
 (0)