Skip to content

Commit f3d9a54

Browse files
committed
Fix up tests
1 parent 7e1b69d commit f3d9a54

File tree

2 files changed

+155
-87
lines changed

2 files changed

+155
-87
lines changed

llvm/test/CodeGen/AArch64/sve-partial-reduce-dot-product.ll

Lines changed: 56 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -423,45 +423,45 @@ define <vscale x 4 x i64> @usdot_8to64(<vscale x 4 x i64> %acc, <vscale x 16 x i
423423
; CHECK-NEWLOWERING-NEXT: .cfi_offset w29, -16
424424
; CHECK-NEWLOWERING-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG
425425
; CHECK-NEWLOWERING-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 16 - 16 * VG
426-
; CHECK-NEWLOWERING-NEXT: uunpklo z4.h, z2.b
426+
; CHECK-NEWLOWERING-NEXT: uunpkhi z4.h, z2.b
427+
; CHECK-NEWLOWERING-NEXT: uunpklo z2.h, z2.b
427428
; CHECK-NEWLOWERING-NEXT: sunpklo z5.h, z3.b
428-
; CHECK-NEWLOWERING-NEXT: uunpkhi z2.h, z2.b
429429
; CHECK-NEWLOWERING-NEXT: sunpkhi z3.h, z3.b
430430
; CHECK-NEWLOWERING-NEXT: ptrue p0.d
431431
; CHECK-NEWLOWERING-NEXT: uunpklo z6.s, z4.h
432+
; CHECK-NEWLOWERING-NEXT: uunpklo z7.s, z2.h
433+
; CHECK-NEWLOWERING-NEXT: sunpklo z24.s, z5.h
434+
; CHECK-NEWLOWERING-NEXT: sunpklo z25.s, z3.h
432435
; CHECK-NEWLOWERING-NEXT: uunpkhi z4.s, z4.h
433-
; CHECK-NEWLOWERING-NEXT: sunpklo z7.s, z5.h
434-
; CHECK-NEWLOWERING-NEXT: sunpkhi z5.s, z5.h
435-
; CHECK-NEWLOWERING-NEXT: uunpklo z24.s, z2.h
436436
; CHECK-NEWLOWERING-NEXT: uunpkhi z2.s, z2.h
437-
; CHECK-NEWLOWERING-NEXT: sunpklo z25.s, z3.h
438437
; CHECK-NEWLOWERING-NEXT: sunpkhi z3.s, z3.h
439-
; CHECK-NEWLOWERING-NEXT: uunpkhi z26.d, z6.s
440-
; CHECK-NEWLOWERING-NEXT: uunpklo z6.d, z6.s
441-
; CHECK-NEWLOWERING-NEXT: uunpklo z27.d, z4.s
442-
; CHECK-NEWLOWERING-NEXT: sunpklo z28.d, z7.s
443-
; CHECK-NEWLOWERING-NEXT: sunpklo z29.d, z5.s
438+
; CHECK-NEWLOWERING-NEXT: sunpkhi z5.s, z5.h
439+
; CHECK-NEWLOWERING-NEXT: uunpklo z26.d, z6.s
440+
; CHECK-NEWLOWERING-NEXT: uunpklo z27.d, z7.s
441+
; CHECK-NEWLOWERING-NEXT: uunpkhi z7.d, z7.s
442+
; CHECK-NEWLOWERING-NEXT: sunpklo z28.d, z24.s
443+
; CHECK-NEWLOWERING-NEXT: sunpkhi z24.d, z24.s
444+
; CHECK-NEWLOWERING-NEXT: uunpkhi z6.d, z6.s
445+
; CHECK-NEWLOWERING-NEXT: sunpklo z29.d, z25.s
446+
; CHECK-NEWLOWERING-NEXT: sunpkhi z25.d, z25.s
447+
; CHECK-NEWLOWERING-NEXT: uunpklo z30.d, z4.s
444448
; CHECK-NEWLOWERING-NEXT: uunpkhi z4.d, z4.s
445-
; CHECK-NEWLOWERING-NEXT: sunpkhi z7.d, z7.s
449+
; CHECK-NEWLOWERING-NEXT: uunpklo z31.d, z2.s
450+
; CHECK-NEWLOWERING-NEXT: uunpkhi z2.d, z2.s
451+
; CHECK-NEWLOWERING-NEXT: sunpklo z8.d, z3.s
452+
; CHECK-NEWLOWERING-NEXT: sunpklo z9.d, z5.s
446453
; CHECK-NEWLOWERING-NEXT: sunpkhi z5.d, z5.s
447-
; CHECK-NEWLOWERING-NEXT: uunpkhi z30.d, z24.s
448-
; CHECK-NEWLOWERING-NEXT: uunpkhi z31.d, z2.s
449-
; CHECK-NEWLOWERING-NEXT: uunpklo z24.d, z24.s
450-
; CHECK-NEWLOWERING-NEXT: uunpklo z2.d, z2.s
451-
; CHECK-NEWLOWERING-NEXT: sunpkhi z8.d, z25.s
452-
; CHECK-NEWLOWERING-NEXT: sunpklo z25.d, z25.s
453-
; CHECK-NEWLOWERING-NEXT: sunpklo z9.d, z3.s
454-
; CHECK-NEWLOWERING-NEXT: mul z27.d, z27.d, z29.d
455-
; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z6.d, z28.d
454+
; CHECK-NEWLOWERING-NEXT: mul z7.d, z7.d, z24.d
455+
; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z27.d, z28.d
456456
; CHECK-NEWLOWERING-NEXT: sunpkhi z3.d, z3.s
457-
; CHECK-NEWLOWERING-NEXT: mul z4.d, z4.d, z5.d
458-
; CHECK-NEWLOWERING-NEXT: mla z1.d, p0/m, z26.d, z7.d
459-
; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z2.d, z9.d
460-
; CHECK-NEWLOWERING-NEXT: movprfx z2, z27
461-
; CHECK-NEWLOWERING-NEXT: mla z2.d, p0/m, z24.d, z25.d
457+
; CHECK-NEWLOWERING-NEXT: mul z6.d, z6.d, z25.d
458+
; CHECK-NEWLOWERING-NEXT: mla z1.d, p0/m, z26.d, z29.d
459+
; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z2.d, z5.d
460+
; CHECK-NEWLOWERING-NEXT: movprfx z2, z7
461+
; CHECK-NEWLOWERING-NEXT: mla z2.d, p0/m, z31.d, z9.d
462462
; CHECK-NEWLOWERING-NEXT: ldr z9, [sp] // 16-byte Folded Reload
463-
; CHECK-NEWLOWERING-NEXT: mla z1.d, p0/m, z31.d, z3.d
464-
; CHECK-NEWLOWERING-NEXT: movprfx z3, z4
463+
; CHECK-NEWLOWERING-NEXT: mla z1.d, p0/m, z4.d, z3.d
464+
; CHECK-NEWLOWERING-NEXT: movprfx z3, z6
465465
; CHECK-NEWLOWERING-NEXT: mla z3.d, p0/m, z30.d, z8.d
466466
; CHECK-NEWLOWERING-NEXT: ldr z8, [sp, #1, mul vl] // 16-byte Folded Reload
467467
; CHECK-NEWLOWERING-NEXT: add z0.d, z2.d, z0.d
@@ -556,45 +556,45 @@ define <vscale x 4 x i64> @sudot_8to64(<vscale x 4 x i64> %acc, <vscale x 16 x i
556556
; CHECK-NEWLOWERING-NEXT: .cfi_offset w29, -16
557557
; CHECK-NEWLOWERING-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG
558558
; CHECK-NEWLOWERING-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 16 - 16 * VG
559-
; CHECK-NEWLOWERING-NEXT: sunpklo z4.h, z2.b
559+
; CHECK-NEWLOWERING-NEXT: sunpkhi z4.h, z2.b
560+
; CHECK-NEWLOWERING-NEXT: sunpklo z2.h, z2.b
560561
; CHECK-NEWLOWERING-NEXT: uunpklo z5.h, z3.b
561-
; CHECK-NEWLOWERING-NEXT: sunpkhi z2.h, z2.b
562562
; CHECK-NEWLOWERING-NEXT: uunpkhi z3.h, z3.b
563563
; CHECK-NEWLOWERING-NEXT: ptrue p0.d
564564
; CHECK-NEWLOWERING-NEXT: sunpklo z6.s, z4.h
565+
; CHECK-NEWLOWERING-NEXT: sunpklo z7.s, z2.h
566+
; CHECK-NEWLOWERING-NEXT: uunpklo z24.s, z5.h
567+
; CHECK-NEWLOWERING-NEXT: uunpklo z25.s, z3.h
565568
; CHECK-NEWLOWERING-NEXT: sunpkhi z4.s, z4.h
566-
; CHECK-NEWLOWERING-NEXT: uunpklo z7.s, z5.h
567-
; CHECK-NEWLOWERING-NEXT: uunpkhi z5.s, z5.h
568-
; CHECK-NEWLOWERING-NEXT: sunpklo z24.s, z2.h
569569
; CHECK-NEWLOWERING-NEXT: sunpkhi z2.s, z2.h
570-
; CHECK-NEWLOWERING-NEXT: uunpklo z25.s, z3.h
571570
; CHECK-NEWLOWERING-NEXT: uunpkhi z3.s, z3.h
572-
; CHECK-NEWLOWERING-NEXT: sunpkhi z26.d, z6.s
573-
; CHECK-NEWLOWERING-NEXT: sunpklo z6.d, z6.s
574-
; CHECK-NEWLOWERING-NEXT: sunpklo z27.d, z4.s
575-
; CHECK-NEWLOWERING-NEXT: uunpklo z28.d, z7.s
576-
; CHECK-NEWLOWERING-NEXT: uunpklo z29.d, z5.s
571+
; CHECK-NEWLOWERING-NEXT: uunpkhi z5.s, z5.h
572+
; CHECK-NEWLOWERING-NEXT: sunpklo z26.d, z6.s
573+
; CHECK-NEWLOWERING-NEXT: sunpklo z27.d, z7.s
574+
; CHECK-NEWLOWERING-NEXT: sunpkhi z7.d, z7.s
575+
; CHECK-NEWLOWERING-NEXT: uunpklo z28.d, z24.s
576+
; CHECK-NEWLOWERING-NEXT: uunpkhi z24.d, z24.s
577+
; CHECK-NEWLOWERING-NEXT: sunpkhi z6.d, z6.s
578+
; CHECK-NEWLOWERING-NEXT: uunpklo z29.d, z25.s
579+
; CHECK-NEWLOWERING-NEXT: uunpkhi z25.d, z25.s
580+
; CHECK-NEWLOWERING-NEXT: sunpklo z30.d, z4.s
577581
; CHECK-NEWLOWERING-NEXT: sunpkhi z4.d, z4.s
578-
; CHECK-NEWLOWERING-NEXT: uunpkhi z7.d, z7.s
582+
; CHECK-NEWLOWERING-NEXT: sunpklo z31.d, z2.s
583+
; CHECK-NEWLOWERING-NEXT: sunpkhi z2.d, z2.s
584+
; CHECK-NEWLOWERING-NEXT: uunpklo z8.d, z3.s
585+
; CHECK-NEWLOWERING-NEXT: uunpklo z9.d, z5.s
579586
; CHECK-NEWLOWERING-NEXT: uunpkhi z5.d, z5.s
580-
; CHECK-NEWLOWERING-NEXT: sunpkhi z30.d, z24.s
581-
; CHECK-NEWLOWERING-NEXT: sunpkhi z31.d, z2.s
582-
; CHECK-NEWLOWERING-NEXT: sunpklo z24.d, z24.s
583-
; CHECK-NEWLOWERING-NEXT: sunpklo z2.d, z2.s
584-
; CHECK-NEWLOWERING-NEXT: uunpkhi z8.d, z25.s
585-
; CHECK-NEWLOWERING-NEXT: uunpklo z25.d, z25.s
586-
; CHECK-NEWLOWERING-NEXT: uunpklo z9.d, z3.s
587-
; CHECK-NEWLOWERING-NEXT: mul z27.d, z27.d, z29.d
588-
; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z6.d, z28.d
587+
; CHECK-NEWLOWERING-NEXT: mul z7.d, z7.d, z24.d
588+
; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z27.d, z28.d
589589
; CHECK-NEWLOWERING-NEXT: uunpkhi z3.d, z3.s
590-
; CHECK-NEWLOWERING-NEXT: mul z4.d, z4.d, z5.d
591-
; CHECK-NEWLOWERING-NEXT: mla z1.d, p0/m, z26.d, z7.d
592-
; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z2.d, z9.d
593-
; CHECK-NEWLOWERING-NEXT: movprfx z2, z27
594-
; CHECK-NEWLOWERING-NEXT: mla z2.d, p0/m, z24.d, z25.d
590+
; CHECK-NEWLOWERING-NEXT: mul z6.d, z6.d, z25.d
591+
; CHECK-NEWLOWERING-NEXT: mla z1.d, p0/m, z26.d, z29.d
592+
; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z2.d, z5.d
593+
; CHECK-NEWLOWERING-NEXT: movprfx z2, z7
594+
; CHECK-NEWLOWERING-NEXT: mla z2.d, p0/m, z31.d, z9.d
595595
; CHECK-NEWLOWERING-NEXT: ldr z9, [sp] // 16-byte Folded Reload
596-
; CHECK-NEWLOWERING-NEXT: mla z1.d, p0/m, z31.d, z3.d
597-
; CHECK-NEWLOWERING-NEXT: movprfx z3, z4
596+
; CHECK-NEWLOWERING-NEXT: mla z1.d, p0/m, z4.d, z3.d
597+
; CHECK-NEWLOWERING-NEXT: movprfx z3, z6
598598
; CHECK-NEWLOWERING-NEXT: mla z3.d, p0/m, z30.d, z8.d
599599
; CHECK-NEWLOWERING-NEXT: ldr z8, [sp, #1, mul vl] // 16-byte Folded Reload
600600
; CHECK-NEWLOWERING-NEXT: add z0.d, z2.d, z0.d

llvm/test/CodeGen/AArch64/sve-partial-reduce-wide-add.ll

Lines changed: 99 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -172,15 +172,35 @@ entry:
172172
}
173173

174174
define <vscale x 2 x i32> @signed_wide_add_nxv4i16(<vscale x 2 x i32> %acc, <vscale x 4 x i16> %input){
175-
; CHECK-LABEL: signed_wide_add_nxv4i16:
176-
; CHECK: // %bb.0: // %entry
177-
; CHECK-NEXT: ptrue p0.s
178-
; CHECK-NEXT: sxth z1.s, p0/m, z1.s
179-
; CHECK-NEXT: uunpklo z2.d, z1.s
180-
; CHECK-NEXT: uunpkhi z1.d, z1.s
181-
; CHECK-NEXT: add z0.d, z0.d, z2.d
182-
; CHECK-NEXT: add z0.d, z1.d, z0.d
183-
; CHECK-NEXT: ret
175+
; CHECK-SVE2-LABEL: signed_wide_add_nxv4i16:
176+
; CHECK-SVE2: // %bb.0: // %entry
177+
; CHECK-SVE2-NEXT: ptrue p0.s
178+
; CHECK-SVE2-NEXT: sxth z1.s, p0/m, z1.s
179+
; CHECK-SVE2-NEXT: uunpklo z2.d, z1.s
180+
; CHECK-SVE2-NEXT: uunpkhi z1.d, z1.s
181+
; CHECK-SVE2-NEXT: add z0.d, z0.d, z2.d
182+
; CHECK-SVE2-NEXT: add z0.d, z1.d, z0.d
183+
; CHECK-SVE2-NEXT: ret
184+
;
185+
; CHECK-SVE-LABEL: signed_wide_add_nxv4i16:
186+
; CHECK-SVE: // %bb.0: // %entry
187+
; CHECK-SVE-NEXT: ptrue p0.s
188+
; CHECK-SVE-NEXT: sxth z1.s, p0/m, z1.s
189+
; CHECK-SVE-NEXT: uunpklo z2.d, z1.s
190+
; CHECK-SVE-NEXT: uunpkhi z1.d, z1.s
191+
; CHECK-SVE-NEXT: add z0.d, z0.d, z2.d
192+
; CHECK-SVE-NEXT: add z0.d, z1.d, z0.d
193+
; CHECK-SVE-NEXT: ret
194+
;
195+
; CHECK-NEWLOWERING-LABEL: signed_wide_add_nxv4i16:
196+
; CHECK-NEWLOWERING: // %bb.0: // %entry
197+
; CHECK-NEWLOWERING-NEXT: ptrue p0.s
198+
; CHECK-NEWLOWERING-NEXT: sxth z1.s, p0/m, z1.s
199+
; CHECK-NEWLOWERING-NEXT: sunpklo z2.d, z1.s
200+
; CHECK-NEWLOWERING-NEXT: sunpkhi z1.d, z1.s
201+
; CHECK-NEWLOWERING-NEXT: add z0.d, z0.d, z2.d
202+
; CHECK-NEWLOWERING-NEXT: add z0.d, z1.d, z0.d
203+
; CHECK-NEWLOWERING-NEXT: ret
184204
entry:
185205
%input.wide = sext <vscale x 4 x i16> %input to <vscale x 4 x i32>
186206
%partial.reduce = tail call <vscale x 2 x i32> @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv4i32(<vscale x 2 x i32> %acc, <vscale x 4 x i32> %input.wide)
@@ -203,35 +223,83 @@ entry:
203223
}
204224

205225
define <vscale x 4 x i64> @signed_wide_add_nxv8i32(<vscale x 4 x i64> %acc, <vscale x 8 x i32> %input){
206-
; CHECK-LABEL: signed_wide_add_nxv8i32:
207-
; CHECK: // %bb.0: // %entry
208-
; CHECK-NEXT: sunpkhi z4.d, z2.s
209-
; CHECK-NEXT: sunpklo z2.d, z2.s
210-
; CHECK-NEXT: sunpkhi z5.d, z3.s
211-
; CHECK-NEXT: sunpklo z3.d, z3.s
212-
; CHECK-NEXT: add z0.d, z0.d, z2.d
213-
; CHECK-NEXT: add z1.d, z1.d, z4.d
214-
; CHECK-NEXT: add z0.d, z3.d, z0.d
215-
; CHECK-NEXT: add z1.d, z5.d, z1.d
216-
; CHECK-NEXT: ret
226+
; CHECK-SVE2-LABEL: signed_wide_add_nxv8i32:
227+
; CHECK-SVE2: // %bb.0: // %entry
228+
; CHECK-SVE2-NEXT: sunpkhi z4.d, z2.s
229+
; CHECK-SVE2-NEXT: sunpklo z2.d, z2.s
230+
; CHECK-SVE2-NEXT: sunpkhi z5.d, z3.s
231+
; CHECK-SVE2-NEXT: sunpklo z3.d, z3.s
232+
; CHECK-SVE2-NEXT: add z0.d, z0.d, z2.d
233+
; CHECK-SVE2-NEXT: add z1.d, z1.d, z4.d
234+
; CHECK-SVE2-NEXT: add z0.d, z3.d, z0.d
235+
; CHECK-SVE2-NEXT: add z1.d, z5.d, z1.d
236+
; CHECK-SVE2-NEXT: ret
237+
;
238+
; CHECK-SVE-LABEL: signed_wide_add_nxv8i32:
239+
; CHECK-SVE: // %bb.0: // %entry
240+
; CHECK-SVE-NEXT: sunpkhi z4.d, z2.s
241+
; CHECK-SVE-NEXT: sunpklo z2.d, z2.s
242+
; CHECK-SVE-NEXT: sunpkhi z5.d, z3.s
243+
; CHECK-SVE-NEXT: sunpklo z3.d, z3.s
244+
; CHECK-SVE-NEXT: add z0.d, z0.d, z2.d
245+
; CHECK-SVE-NEXT: add z1.d, z1.d, z4.d
246+
; CHECK-SVE-NEXT: add z0.d, z3.d, z0.d
247+
; CHECK-SVE-NEXT: add z1.d, z5.d, z1.d
248+
; CHECK-SVE-NEXT: ret
249+
;
250+
; CHECK-NEWLOWERING-LABEL: signed_wide_add_nxv8i32:
251+
; CHECK-NEWLOWERING: // %bb.0: // %entry
252+
; CHECK-NEWLOWERING-NEXT: sunpklo z4.d, z2.s
253+
; CHECK-NEWLOWERING-NEXT: sunpklo z5.d, z3.s
254+
; CHECK-NEWLOWERING-NEXT: sunpkhi z2.d, z2.s
255+
; CHECK-NEWLOWERING-NEXT: sunpkhi z3.d, z3.s
256+
; CHECK-NEWLOWERING-NEXT: add z0.d, z0.d, z4.d
257+
; CHECK-NEWLOWERING-NEXT: add z1.d, z1.d, z5.d
258+
; CHECK-NEWLOWERING-NEXT: add z0.d, z2.d, z0.d
259+
; CHECK-NEWLOWERING-NEXT: add z1.d, z3.d, z1.d
260+
; CHECK-NEWLOWERING-NEXT: ret
217261
entry:
218262
%input.wide = sext <vscale x 8 x i32> %input to <vscale x 8 x i64>
219263
%partial.reduce = tail call <vscale x 4 x i64> @llvm.experimental.vector.partial.reduce.add.nxv4i64.nxv8i64(<vscale x 4 x i64> %acc, <vscale x 8 x i64> %input.wide)
220264
ret <vscale x 4 x i64> %partial.reduce
221265
}
222266

223267
define <vscale x 4 x i64> @unsigned_wide_add_nxv8i32(<vscale x 4 x i64> %acc, <vscale x 8 x i32> %input){
224-
; CHECK-LABEL: unsigned_wide_add_nxv8i32:
225-
; CHECK: // %bb.0: // %entry
226-
; CHECK-NEXT: uunpkhi z4.d, z2.s
227-
; CHECK-NEXT: uunpklo z2.d, z2.s
228-
; CHECK-NEXT: uunpkhi z5.d, z3.s
229-
; CHECK-NEXT: uunpklo z3.d, z3.s
230-
; CHECK-NEXT: add z0.d, z0.d, z2.d
231-
; CHECK-NEXT: add z1.d, z1.d, z4.d
232-
; CHECK-NEXT: add z0.d, z3.d, z0.d
233-
; CHECK-NEXT: add z1.d, z5.d, z1.d
234-
; CHECK-NEXT: ret
268+
; CHECK-SVE2-LABEL: unsigned_wide_add_nxv8i32:
269+
; CHECK-SVE2: // %bb.0: // %entry
270+
; CHECK-SVE2-NEXT: uunpkhi z4.d, z2.s
271+
; CHECK-SVE2-NEXT: uunpklo z2.d, z2.s
272+
; CHECK-SVE2-NEXT: uunpkhi z5.d, z3.s
273+
; CHECK-SVE2-NEXT: uunpklo z3.d, z3.s
274+
; CHECK-SVE2-NEXT: add z0.d, z0.d, z2.d
275+
; CHECK-SVE2-NEXT: add z1.d, z1.d, z4.d
276+
; CHECK-SVE2-NEXT: add z0.d, z3.d, z0.d
277+
; CHECK-SVE2-NEXT: add z1.d, z5.d, z1.d
278+
; CHECK-SVE2-NEXT: ret
279+
;
280+
; CHECK-SVE-LABEL: unsigned_wide_add_nxv8i32:
281+
; CHECK-SVE: // %bb.0: // %entry
282+
; CHECK-SVE-NEXT: uunpkhi z4.d, z2.s
283+
; CHECK-SVE-NEXT: uunpklo z2.d, z2.s
284+
; CHECK-SVE-NEXT: uunpkhi z5.d, z3.s
285+
; CHECK-SVE-NEXT: uunpklo z3.d, z3.s
286+
; CHECK-SVE-NEXT: add z0.d, z0.d, z2.d
287+
; CHECK-SVE-NEXT: add z1.d, z1.d, z4.d
288+
; CHECK-SVE-NEXT: add z0.d, z3.d, z0.d
289+
; CHECK-SVE-NEXT: add z1.d, z5.d, z1.d
290+
; CHECK-SVE-NEXT: ret
291+
;
292+
; CHECK-NEWLOWERING-LABEL: unsigned_wide_add_nxv8i32:
293+
; CHECK-NEWLOWERING: // %bb.0: // %entry
294+
; CHECK-NEWLOWERING-NEXT: uunpklo z4.d, z2.s
295+
; CHECK-NEWLOWERING-NEXT: uunpklo z5.d, z3.s
296+
; CHECK-NEWLOWERING-NEXT: uunpkhi z2.d, z2.s
297+
; CHECK-NEWLOWERING-NEXT: uunpkhi z3.d, z3.s
298+
; CHECK-NEWLOWERING-NEXT: add z0.d, z0.d, z4.d
299+
; CHECK-NEWLOWERING-NEXT: add z1.d, z1.d, z5.d
300+
; CHECK-NEWLOWERING-NEXT: add z0.d, z2.d, z0.d
301+
; CHECK-NEWLOWERING-NEXT: add z1.d, z3.d, z1.d
302+
; CHECK-NEWLOWERING-NEXT: ret
235303
entry:
236304
%input.wide = zext <vscale x 8 x i32> %input to <vscale x 8 x i64>
237305
%partial.reduce = tail call <vscale x 4 x i64> @llvm.experimental.vector.partial.reduce.add.nxv4i64.nxv8i64(<vscale x 4 x i64> %acc, <vscale x 8 x i64> %input.wide)

0 commit comments

Comments
 (0)