You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
[DAGCombiner] Add DAG combine for PARTIAL_REDUCE_MLA when no mul op
Generic DAG combine for ISD::PARTIAL_REDUCE_U/SMLA to convert:
PARTIAL_REDUCE_*MLA(Acc, ZEXT(UnextOp1), Splat(1)) into
PARTIAL_REDUCE_UMLA(Acc, UnextOp1, TRUNC(Splat(1)))
and
PARTIAL_REDUCE_*MLA(Acc, SEXT(UnextOp1), Splat(1)) into
PARTIAL_REDUCE_SMLA(Acc, UnextOp1, TRUNC(Splat(1))).
Copy file name to clipboardExpand all lines: llvm/test/CodeGen/AArch64/sve-partial-reduce-dot-product.ll
+20-82Lines changed: 20 additions & 82 deletions
Original file line number
Diff line number
Diff line change
@@ -620,16 +620,8 @@ define <vscale x 4 x i32> @udot_no_bin_op(<vscale x 4 x i32> %acc, <vscale x 16
620
620
;
621
621
; CHECK-NEWLOWERING-LABEL: udot_no_bin_op:
622
622
; CHECK-NEWLOWERING: // %bb.0:
623
-
; CHECK-NEWLOWERING-NEXT: uunpklo z2.h, z1.b
624
-
; CHECK-NEWLOWERING-NEXT: uunpkhi z1.h, z1.b
625
-
; CHECK-NEWLOWERING-NEXT: uunpklo z3.s, z2.h
626
-
; CHECK-NEWLOWERING-NEXT: uunpkhi z4.s, z1.h
627
-
; CHECK-NEWLOWERING-NEXT: uunpklo z1.s, z1.h
628
-
; CHECK-NEWLOWERING-NEXT: uunpkhi z2.s, z2.h
629
-
; CHECK-NEWLOWERING-NEXT: add z0.s, z0.s, z3.s
630
-
; CHECK-NEWLOWERING-NEXT: add z1.s, z2.s, z1.s
631
-
; CHECK-NEWLOWERING-NEXT: add z0.s, z4.s, z0.s
632
-
; CHECK-NEWLOWERING-NEXT: add z0.s, z1.s, z0.s
623
+
; CHECK-NEWLOWERING-NEXT: mov z2.b, #1 // =0x1
624
+
; CHECK-NEWLOWERING-NEXT: udot z0.s, z1.b, z2.b
633
625
; CHECK-NEWLOWERING-NEXT: ret
634
626
%a.ext = zext <vscale x 16 x i8> %ato <vscale x 16 x i32>
635
627
%partial.reduce = tailcall <vscale x 4 x i32> @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32(<vscale x 4 x i32> %acc, <vscale x 16 x i32> %a.ext)
@@ -645,16 +637,8 @@ define <vscale x 4 x i32> @sdot_no_bin_op(<vscale x 4 x i32> %acc, <vscale x 16
645
637
;
646
638
; CHECK-NEWLOWERING-LABEL: sdot_no_bin_op:
647
639
; CHECK-NEWLOWERING: // %bb.0:
648
-
; CHECK-NEWLOWERING-NEXT: sunpklo z2.h, z1.b
649
-
; CHECK-NEWLOWERING-NEXT: sunpkhi z1.h, z1.b
650
-
; CHECK-NEWLOWERING-NEXT: sunpklo z3.s, z2.h
651
-
; CHECK-NEWLOWERING-NEXT: sunpkhi z4.s, z1.h
652
-
; CHECK-NEWLOWERING-NEXT: sunpklo z1.s, z1.h
653
-
; CHECK-NEWLOWERING-NEXT: sunpkhi z2.s, z2.h
654
-
; CHECK-NEWLOWERING-NEXT: add z0.s, z0.s, z3.s
655
-
; CHECK-NEWLOWERING-NEXT: add z1.s, z2.s, z1.s
656
-
; CHECK-NEWLOWERING-NEXT: add z0.s, z4.s, z0.s
657
-
; CHECK-NEWLOWERING-NEXT: add z0.s, z1.s, z0.s
640
+
; CHECK-NEWLOWERING-NEXT: mov z2.b, #1 // =0x1
641
+
; CHECK-NEWLOWERING-NEXT: sdot z0.s, z1.b, z2.b
658
642
; CHECK-NEWLOWERING-NEXT: ret
659
643
%a.ext = sext <vscale x 16 x i8> %ato <vscale x 16 x i32>
660
644
%partial.reduce = tailcall <vscale x 4 x i32> @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32(<vscale x 4 x i32> %acc, <vscale x 16 x i32> %a.ext)
@@ -670,16 +654,8 @@ define <vscale x 2 x i64> @udot_no_bin_op_wide(<vscale x 2 x i64> %acc, <vscale
670
654
;
671
655
; CHECK-NEWLOWERING-LABEL: udot_no_bin_op_wide:
672
656
; CHECK-NEWLOWERING: // %bb.0: // %entry
673
-
; CHECK-NEWLOWERING-NEXT: uunpklo z2.s, z1.h
674
-
; CHECK-NEWLOWERING-NEXT: uunpkhi z1.s, z1.h
675
-
; CHECK-NEWLOWERING-NEXT: uunpklo z3.d, z2.s
676
-
; CHECK-NEWLOWERING-NEXT: uunpkhi z4.d, z1.s
677
-
; CHECK-NEWLOWERING-NEXT: uunpklo z1.d, z1.s
678
-
; CHECK-NEWLOWERING-NEXT: uunpkhi z2.d, z2.s
679
-
; CHECK-NEWLOWERING-NEXT: add z0.d, z0.d, z3.d
680
-
; CHECK-NEWLOWERING-NEXT: add z1.d, z2.d, z1.d
681
-
; CHECK-NEWLOWERING-NEXT: add z0.d, z4.d, z0.d
682
-
; CHECK-NEWLOWERING-NEXT: add z0.d, z1.d, z0.d
657
+
; CHECK-NEWLOWERING-NEXT: mov z2.h, #1 // =0x1
658
+
; CHECK-NEWLOWERING-NEXT: udot z0.d, z1.h, z2.h
683
659
; CHECK-NEWLOWERING-NEXT: ret
684
660
entry:
685
661
%a.wide = zext <vscale x 8 x i16> %ato <vscale x 8 x i64>
@@ -696,16 +672,8 @@ define <vscale x 2 x i64> @sdot_no_bin_op_wide(<vscale x 2 x i64> %acc, <vscale
696
672
;
697
673
; CHECK-NEWLOWERING-LABEL: sdot_no_bin_op_wide:
698
674
; CHECK-NEWLOWERING: // %bb.0: // %entry
699
-
; CHECK-NEWLOWERING-NEXT: sunpklo z2.s, z1.h
700
-
; CHECK-NEWLOWERING-NEXT: sunpkhi z1.s, z1.h
701
-
; CHECK-NEWLOWERING-NEXT: sunpklo z3.d, z2.s
702
-
; CHECK-NEWLOWERING-NEXT: sunpkhi z4.d, z1.s
703
-
; CHECK-NEWLOWERING-NEXT: sunpklo z1.d, z1.s
704
-
; CHECK-NEWLOWERING-NEXT: sunpkhi z2.d, z2.s
705
-
; CHECK-NEWLOWERING-NEXT: add z0.d, z0.d, z3.d
706
-
; CHECK-NEWLOWERING-NEXT: add z1.d, z2.d, z1.d
707
-
; CHECK-NEWLOWERING-NEXT: add z0.d, z4.d, z0.d
708
-
; CHECK-NEWLOWERING-NEXT: add z0.d, z1.d, z0.d
675
+
; CHECK-NEWLOWERING-NEXT: mov z2.h, #1 // =0x1
676
+
; CHECK-NEWLOWERING-NEXT: sdot z0.d, z1.h, z2.h
709
677
; CHECK-NEWLOWERING-NEXT: ret
710
678
entry:
711
679
%a.wide = sext <vscale x 8 x i16> %ato <vscale x 8 x i64>
@@ -727,28 +695,13 @@ define <vscale x 4 x i64> @udot_no_bin_op_8to64(<vscale x 4 x i64> %acc, <vscale
727
695
;
728
696
; CHECK-NEWLOWERING-LABEL: udot_no_bin_op_8to64:
729
697
; CHECK-NEWLOWERING: // %bb.0:
730
-
; CHECK-NEWLOWERING-NEXT: uunpklo z3.h, z2.b
698
+
; CHECK-NEWLOWERING-NEXT: mov z3.b, #1 // =0x1
699
+
; CHECK-NEWLOWERING-NEXT: uunpklo z5.h, z2.b
731
700
; CHECK-NEWLOWERING-NEXT: uunpkhi z2.h, z2.b
732
-
; CHECK-NEWLOWERING-NEXT: uunpklo z4.s, z3.h
733
-
; CHECK-NEWLOWERING-NEXT: uunpkhi z5.s, z2.h
734
-
; CHECK-NEWLOWERING-NEXT: uunpklo z2.s, z2.h
735
-
; CHECK-NEWLOWERING-NEXT: uunpkhi z3.s, z3.h
736
-
; CHECK-NEWLOWERING-NEXT: uunpkhi z6.d, z4.s
737
-
; CHECK-NEWLOWERING-NEXT: uunpklo z4.d, z4.s
738
-
; CHECK-NEWLOWERING-NEXT: uunpkhi z7.d, z5.s
739
-
; CHECK-NEWLOWERING-NEXT: uunpklo z24.d, z2.s
740
-
; CHECK-NEWLOWERING-NEXT: uunpklo z25.d, z3.s
741
-
; CHECK-NEWLOWERING-NEXT: uunpkhi z2.d, z2.s
742
-
; CHECK-NEWLOWERING-NEXT: uunpkhi z3.d, z3.s
743
-
; CHECK-NEWLOWERING-NEXT: uunpklo z5.d, z5.s
744
-
; CHECK-NEWLOWERING-NEXT: add z0.d, z0.d, z4.d
745
-
; CHECK-NEWLOWERING-NEXT: add z1.d, z1.d, z6.d
746
-
; CHECK-NEWLOWERING-NEXT: add z4.d, z25.d, z24.d
747
-
; CHECK-NEWLOWERING-NEXT: add z2.d, z3.d, z2.d
748
-
; CHECK-NEWLOWERING-NEXT: add z0.d, z5.d, z0.d
749
-
; CHECK-NEWLOWERING-NEXT: add z1.d, z7.d, z1.d
750
-
; CHECK-NEWLOWERING-NEXT: add z0.d, z4.d, z0.d
751
-
; CHECK-NEWLOWERING-NEXT: add z1.d, z2.d, z1.d
701
+
; CHECK-NEWLOWERING-NEXT: uunpklo z4.h, z3.b
702
+
; CHECK-NEWLOWERING-NEXT: uunpkhi z3.h, z3.b
703
+
; CHECK-NEWLOWERING-NEXT: udot z0.d, z5.h, z4.h
704
+
; CHECK-NEWLOWERING-NEXT: udot z1.d, z2.h, z3.h
752
705
; CHECK-NEWLOWERING-NEXT: ret
753
706
%a.ext = zext <vscale x 16 x i8> %ato <vscale x 16 x i64>
754
707
%partial.reduce = tailcall <vscale x 4 x i64> @llvm.experimental.vector.partial.reduce.add.nxv4i64.nxv16i64(<vscale x 4 x i64> %acc, <vscale x 16 x i64> %a.ext)
@@ -769,28 +722,13 @@ define <vscale x 4 x i64> @sdot_no_bin_op_8to64(<vscale x 4 x i64> %acc, <vscale
769
722
;
770
723
; CHECK-NEWLOWERING-LABEL: sdot_no_bin_op_8to64:
771
724
; CHECK-NEWLOWERING: // %bb.0:
772
-
; CHECK-NEWLOWERING-NEXT: sunpklo z3.h, z2.b
725
+
; CHECK-NEWLOWERING-NEXT: mov z3.b, #1 // =0x1
726
+
; CHECK-NEWLOWERING-NEXT: sunpklo z5.h, z2.b
773
727
; CHECK-NEWLOWERING-NEXT: sunpkhi z2.h, z2.b
774
-
; CHECK-NEWLOWERING-NEXT: sunpklo z4.s, z3.h
775
-
; CHECK-NEWLOWERING-NEXT: sunpkhi z5.s, z2.h
776
-
; CHECK-NEWLOWERING-NEXT: sunpklo z2.s, z2.h
777
-
; CHECK-NEWLOWERING-NEXT: sunpkhi z3.s, z3.h
778
-
; CHECK-NEWLOWERING-NEXT: sunpkhi z6.d, z4.s
779
-
; CHECK-NEWLOWERING-NEXT: sunpklo z4.d, z4.s
780
-
; CHECK-NEWLOWERING-NEXT: sunpkhi z7.d, z5.s
781
-
; CHECK-NEWLOWERING-NEXT: sunpklo z24.d, z2.s
782
-
; CHECK-NEWLOWERING-NEXT: sunpklo z25.d, z3.s
783
-
; CHECK-NEWLOWERING-NEXT: sunpkhi z2.d, z2.s
784
-
; CHECK-NEWLOWERING-NEXT: sunpkhi z3.d, z3.s
785
-
; CHECK-NEWLOWERING-NEXT: sunpklo z5.d, z5.s
786
-
; CHECK-NEWLOWERING-NEXT: add z0.d, z0.d, z4.d
787
-
; CHECK-NEWLOWERING-NEXT: add z1.d, z1.d, z6.d
788
-
; CHECK-NEWLOWERING-NEXT: add z4.d, z25.d, z24.d
789
-
; CHECK-NEWLOWERING-NEXT: add z2.d, z3.d, z2.d
790
-
; CHECK-NEWLOWERING-NEXT: add z0.d, z5.d, z0.d
791
-
; CHECK-NEWLOWERING-NEXT: add z1.d, z7.d, z1.d
792
-
; CHECK-NEWLOWERING-NEXT: add z0.d, z4.d, z0.d
793
-
; CHECK-NEWLOWERING-NEXT: add z1.d, z2.d, z1.d
728
+
; CHECK-NEWLOWERING-NEXT: sunpklo z4.h, z3.b
729
+
; CHECK-NEWLOWERING-NEXT: sunpkhi z3.h, z3.b
730
+
; CHECK-NEWLOWERING-NEXT: sdot z0.d, z5.h, z4.h
731
+
; CHECK-NEWLOWERING-NEXT: sdot z1.d, z2.h, z3.h
794
732
; CHECK-NEWLOWERING-NEXT: ret
795
733
%a.ext = sext <vscale x 16 x i8> %ato <vscale x 16 x i64>
796
734
%partial.reduce = tailcall <vscale x 4 x i64> @llvm.experimental.vector.partial.reduce.add.nxv4i64.nxv16i64(<vscale x 4 x i64> %acc, <vscale x 16 x i64> %a.ext)
0 commit comments