You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: llvm/test/CodeGen/AArch64/sve-partial-reduce-dot-product.ll
+130Lines changed: 130 additions & 0 deletions
Original file line number
Diff line number
Diff line change
@@ -497,3 +497,133 @@ entry:
497
497
%partial.reduce = tailcall <vscale x 2 x i64> @llvm.experimental.vector.partial.reduce.add.nxv2i64.nxv8i64(<vscale x 2 x i64> %acc, <vscale x 8 x i64> %mult)
498
498
ret <vscale x 2 x i64> %partial.reduce
499
499
}
500
+
501
+
define <vscale x 2 x i64> @udot_different_types(<vscale x 2 x i64> %acc, <vscale x 8 x i16> %a, <vscale x 8 x i8> %b){
502
+
; CHECK-LABEL: udot_different_types:
503
+
; CHECK: // %bb.0: // %entry
504
+
; CHECK-NEXT: and z2.h, z2.h, #0xff
505
+
; CHECK-NEXT: uunpklo z3.s, z1.h
506
+
; CHECK-NEXT: uunpkhi z1.s, z1.h
507
+
; CHECK-NEXT: ptrue p0.d
508
+
; CHECK-NEXT: uunpklo z4.s, z2.h
509
+
; CHECK-NEXT: uunpkhi z2.s, z2.h
510
+
; CHECK-NEXT: uunpklo z5.d, z3.s
511
+
; CHECK-NEXT: uunpkhi z3.d, z3.s
512
+
; CHECK-NEXT: uunpklo z7.d, z1.s
513
+
; CHECK-NEXT: uunpkhi z1.d, z1.s
514
+
; CHECK-NEXT: uunpklo z6.d, z4.s
515
+
; CHECK-NEXT: uunpkhi z4.d, z4.s
516
+
; CHECK-NEXT: uunpklo z24.d, z2.s
517
+
; CHECK-NEXT: uunpkhi z2.d, z2.s
518
+
; CHECK-NEXT: mul z3.d, z3.d, z4.d
519
+
; CHECK-NEXT: mla z0.d, p0/m, z5.d, z6.d
520
+
; CHECK-NEXT: mla z0.d, p0/m, z1.d, z2.d
521
+
; CHECK-NEXT: movprfx z1, z3
522
+
; CHECK-NEXT: mla z1.d, p0/m, z7.d, z24.d
523
+
; CHECK-NEXT: add z0.d, z1.d, z0.d
524
+
; CHECK-NEXT: ret
525
+
entry:
526
+
%a.wide = zext <vscale x 8 x i16> %ato <vscale x 8 x i64>
527
+
%b.wide = zext <vscale x 8 x i8> %bto <vscale x 8 x i64>
528
+
%mult = mulnuwnsw <vscale x 8 x i64> %a.wide, %b.wide
529
+
%partial.reduce = tailcall <vscale x 2 x i64> @llvm.experimental.vector.partial.reduce.add.nxv2i64.nxv8i64(<vscale x 2 x i64> %acc, <vscale x 8 x i64> %mult)
530
+
ret <vscale x 2 x i64> %partial.reduce
531
+
}
532
+
533
+
define <vscale x 2 x i64> @sdot_different_types(<vscale x 2 x i64> %acc, <vscale x 8 x i16> %a, <vscale x 8 x i8> %b){
534
+
; CHECK-LABEL: sdot_different_types:
535
+
; CHECK: // %bb.0: // %entry
536
+
; CHECK-NEXT: ptrue p0.h
537
+
; CHECK-NEXT: sunpklo z3.s, z1.h
538
+
; CHECK-NEXT: sunpkhi z1.s, z1.h
539
+
; CHECK-NEXT: sxtb z2.h, p0/m, z2.h
540
+
; CHECK-NEXT: ptrue p0.d
541
+
; CHECK-NEXT: sunpklo z5.d, z3.s
542
+
; CHECK-NEXT: sunpkhi z3.d, z3.s
543
+
; CHECK-NEXT: sunpklo z7.d, z1.s
544
+
; CHECK-NEXT: sunpklo z4.s, z2.h
545
+
; CHECK-NEXT: sunpkhi z2.s, z2.h
546
+
; CHECK-NEXT: sunpkhi z1.d, z1.s
547
+
; CHECK-NEXT: sunpklo z6.d, z4.s
548
+
; CHECK-NEXT: sunpkhi z4.d, z4.s
549
+
; CHECK-NEXT: sunpklo z24.d, z2.s
550
+
; CHECK-NEXT: sunpkhi z2.d, z2.s
551
+
; CHECK-NEXT: mul z3.d, z3.d, z4.d
552
+
; CHECK-NEXT: mla z0.d, p0/m, z5.d, z6.d
553
+
; CHECK-NEXT: mla z0.d, p0/m, z1.d, z2.d
554
+
; CHECK-NEXT: movprfx z1, z3
555
+
; CHECK-NEXT: mla z1.d, p0/m, z7.d, z24.d
556
+
; CHECK-NEXT: add z0.d, z1.d, z0.d
557
+
; CHECK-NEXT: ret
558
+
entry:
559
+
%a.wide = sext <vscale x 8 x i16> %ato <vscale x 8 x i64>
560
+
%b.wide = sext <vscale x 8 x i8> %bto <vscale x 8 x i64>
561
+
%mult = mulnuwnsw <vscale x 8 x i64> %a.wide, %b.wide
562
+
%partial.reduce = tailcall <vscale x 2 x i64> @llvm.experimental.vector.partial.reduce.add.nxv2i64.nxv8i64(<vscale x 2 x i64> %acc, <vscale x 8 x i64> %mult)
563
+
ret <vscale x 2 x i64> %partial.reduce
564
+
}
565
+
566
+
define <vscale x 2 x i64> @usdot_different_types(<vscale x 2 x i64> %acc, <vscale x 8 x i16> %a, <vscale x 8 x i8> %b){
567
+
; CHECK-LABEL: usdot_different_types:
568
+
; CHECK: // %bb.0: // %entry
569
+
; CHECK-NEXT: ptrue p0.h
570
+
; CHECK-NEXT: uunpklo z3.s, z1.h
571
+
; CHECK-NEXT: uunpkhi z1.s, z1.h
572
+
; CHECK-NEXT: sxtb z2.h, p0/m, z2.h
573
+
; CHECK-NEXT: ptrue p0.d
574
+
; CHECK-NEXT: uunpklo z5.d, z3.s
575
+
; CHECK-NEXT: uunpkhi z3.d, z3.s
576
+
; CHECK-NEXT: uunpklo z7.d, z1.s
577
+
; CHECK-NEXT: sunpklo z4.s, z2.h
578
+
; CHECK-NEXT: sunpkhi z2.s, z2.h
579
+
; CHECK-NEXT: uunpkhi z1.d, z1.s
580
+
; CHECK-NEXT: sunpklo z6.d, z4.s
581
+
; CHECK-NEXT: sunpkhi z4.d, z4.s
582
+
; CHECK-NEXT: sunpklo z24.d, z2.s
583
+
; CHECK-NEXT: sunpkhi z2.d, z2.s
584
+
; CHECK-NEXT: mul z3.d, z3.d, z4.d
585
+
; CHECK-NEXT: mla z0.d, p0/m, z5.d, z6.d
586
+
; CHECK-NEXT: mla z0.d, p0/m, z1.d, z2.d
587
+
; CHECK-NEXT: movprfx z1, z3
588
+
; CHECK-NEXT: mla z1.d, p0/m, z7.d, z24.d
589
+
; CHECK-NEXT: add z0.d, z1.d, z0.d
590
+
; CHECK-NEXT: ret
591
+
entry:
592
+
%a.wide = zext <vscale x 8 x i16> %ato <vscale x 8 x i64>
593
+
%b.wide = sext <vscale x 8 x i8> %bto <vscale x 8 x i64>
594
+
%mult = mulnuwnsw <vscale x 8 x i64> %a.wide, %b.wide
595
+
%partial.reduce = tailcall <vscale x 2 x i64> @llvm.experimental.vector.partial.reduce.add.nxv2i64.nxv8i64(<vscale x 2 x i64> %acc, <vscale x 8 x i64> %mult)
596
+
ret <vscale x 2 x i64> %partial.reduce
597
+
}
598
+
599
+
define <vscale x 2 x i64> @sudot_different_types(<vscale x 2 x i64> %acc, <vscale x 8 x i16> %a, <vscale x 8 x i8> %b){
600
+
; CHECK-LABEL: sudot_different_types:
601
+
; CHECK: // %bb.0: // %entry
602
+
; CHECK-NEXT: and z2.h, z2.h, #0xff
603
+
; CHECK-NEXT: sunpklo z3.s, z1.h
604
+
; CHECK-NEXT: sunpkhi z1.s, z1.h
605
+
; CHECK-NEXT: ptrue p0.d
606
+
; CHECK-NEXT: uunpklo z4.s, z2.h
607
+
; CHECK-NEXT: uunpkhi z2.s, z2.h
608
+
; CHECK-NEXT: sunpklo z5.d, z3.s
609
+
; CHECK-NEXT: sunpkhi z3.d, z3.s
610
+
; CHECK-NEXT: sunpklo z7.d, z1.s
611
+
; CHECK-NEXT: sunpkhi z1.d, z1.s
612
+
; CHECK-NEXT: uunpklo z6.d, z4.s
613
+
; CHECK-NEXT: uunpkhi z4.d, z4.s
614
+
; CHECK-NEXT: uunpklo z24.d, z2.s
615
+
; CHECK-NEXT: uunpkhi z2.d, z2.s
616
+
; CHECK-NEXT: mul z3.d, z3.d, z4.d
617
+
; CHECK-NEXT: mla z0.d, p0/m, z5.d, z6.d
618
+
; CHECK-NEXT: mla z0.d, p0/m, z1.d, z2.d
619
+
; CHECK-NEXT: movprfx z1, z3
620
+
; CHECK-NEXT: mla z1.d, p0/m, z7.d, z24.d
621
+
; CHECK-NEXT: add z0.d, z1.d, z0.d
622
+
; CHECK-NEXT: ret
623
+
entry:
624
+
%a.wide = sext <vscale x 8 x i16> %ato <vscale x 8 x i64>
625
+
%b.wide = zext <vscale x 8 x i8> %bto <vscale x 8 x i64>
626
+
%mult = mulnuwnsw <vscale x 8 x i64> %a.wide, %b.wide
627
+
%partial.reduce = tailcall <vscale x 2 x i64> @llvm.experimental.vector.partial.reduce.add.nxv2i64.nxv8i64(<vscale x 2 x i64> %acc, <vscale x 8 x i64> %mult)
0 commit comments