@@ -554,6 +554,52 @@ define i32 @dot_product_i32(ptr %a, ptr %b) {
554
554
ret i32 %add.1
555
555
}
556
556
557
+ ; Same as above, except the reduction order has been perturbed. This
558
+ ; is checking for our ability to reorder.
559
+ define i32 @dot_product_i32_reorder (ptr %a , ptr %b ) {
560
+ ; CHECK-LABEL: @dot_product_i32_reorder(
561
+ ; CHECK-NEXT: [[GEP_A_0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 0
562
+ ; CHECK-NEXT: [[L_A_0:%.*]] = load i32, ptr [[GEP_A_0]], align 4
563
+ ; CHECK-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 1
564
+ ; CHECK-NEXT: [[L_A_1:%.*]] = load i32, ptr [[GEP_A_1]], align 4
565
+ ; CHECK-NEXT: [[GEP_A_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 2
566
+ ; CHECK-NEXT: [[L_A_2:%.*]] = load i32, ptr [[GEP_A_2]], align 4
567
+ ; CHECK-NEXT: [[GEP_B_0:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i32 0
568
+ ; CHECK-NEXT: [[L_B_0:%.*]] = load i32, ptr [[GEP_B_0]], align 4
569
+ ; CHECK-NEXT: [[GEP_B_1:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 1
570
+ ; CHECK-NEXT: [[L_B_1:%.*]] = load i32, ptr [[GEP_B_1]], align 4
571
+ ; CHECK-NEXT: [[GEP_B_2:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 2
572
+ ; CHECK-NEXT: [[L_B_2:%.*]] = load i32, ptr [[GEP_B_2]], align 4
573
+ ; CHECK-NEXT: [[MUL_0:%.*]] = mul nsw i32 [[L_A_0]], [[L_B_0]]
574
+ ; CHECK-NEXT: [[MUL_1:%.*]] = mul nsw i32 [[L_A_1]], [[L_B_1]]
575
+ ; CHECK-NEXT: [[MUL_2:%.*]] = mul nsw i32 [[L_A_2]], [[L_B_2]]
576
+ ; CHECK-NEXT: [[ADD_0:%.*]] = add i32 [[MUL_1]], [[MUL_0]]
577
+ ; CHECK-NEXT: [[ADD_1:%.*]] = add i32 [[ADD_0]], [[MUL_2]]
578
+ ; CHECK-NEXT: ret i32 [[ADD_1]]
579
+ ;
580
+ %gep.a.0 = getelementptr inbounds i32 , ptr %a , i32 0
581
+ %l.a.0 = load i32 , ptr %gep.a.0 , align 4
582
+ %gep.a.1 = getelementptr inbounds i32 , ptr %a , i32 1
583
+ %l.a.1 = load i32 , ptr %gep.a.1 , align 4
584
+ %gep.a.2 = getelementptr inbounds i32 , ptr %a , i32 2
585
+ %l.a.2 = load i32 , ptr %gep.a.2 , align 4
586
+
587
+ %gep.b.0 = getelementptr inbounds i32 , ptr %b , i32 0
588
+ %l.b.0 = load i32 , ptr %gep.b.0 , align 4
589
+ %gep.b.1 = getelementptr inbounds i32 , ptr %b , i32 1
590
+ %l.b.1 = load i32 , ptr %gep.b.1 , align 4
591
+ %gep.b.2 = getelementptr inbounds i32 , ptr %b , i32 2
592
+ %l.b.2 = load i32 , ptr %gep.b.2 , align 4
593
+
594
+ %mul.0 = mul nsw i32 %l.a.0 , %l.b.0
595
+ %mul.1 = mul nsw i32 %l.a.1 , %l.b.1
596
+ %mul.2 = mul nsw i32 %l.a.2 , %l.b.2
597
+
598
+ %add.0 = add i32 %mul.1 , %mul.0
599
+ %add.1 = add i32 %add.0 , %mul.2
600
+ ret i32 %add.1
601
+ }
602
+
557
603
define float @dot_product_fp32 (ptr %a , ptr %b ) {
558
604
; NON-POW2-LABEL: @dot_product_fp32(
559
605
; NON-POW2-NEXT: [[GEP_A_0:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i32 0
@@ -604,6 +650,50 @@ define float @dot_product_fp32(ptr %a, ptr %b) {
604
650
ret float %add.1
605
651
}
606
652
653
+ ; Same as above, except the reduction order has been perturbed. This
654
+ ; is checking for our ability to reorder.
655
+ define float @dot_product_fp32_reorder (ptr %a , ptr %b ) {
656
+ ; CHECK-LABEL: @dot_product_fp32_reorder(
657
+ ; CHECK-NEXT: [[GEP_A_0:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i32 0
658
+ ; CHECK-NEXT: [[GEP_A_2:%.*]] = getelementptr inbounds float, ptr [[A]], i32 2
659
+ ; CHECK-NEXT: [[L_A_2:%.*]] = load float, ptr [[GEP_A_2]], align 4
660
+ ; CHECK-NEXT: [[GEP_B_0:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i32 0
661
+ ; CHECK-NEXT: [[GEP_B_2:%.*]] = getelementptr inbounds float, ptr [[B]], i32 2
662
+ ; CHECK-NEXT: [[L_B_2:%.*]] = load float, ptr [[GEP_B_2]], align 4
663
+ ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[GEP_A_0]], align 4
664
+ ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr [[GEP_B_0]], align 4
665
+ ; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <2 x float> [[TMP1]], [[TMP2]]
666
+ ; CHECK-NEXT: [[MUL_2:%.*]] = fmul fast float [[L_A_2]], [[L_B_2]]
667
+ ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP3]], i32 0
668
+ ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP3]], i32 1
669
+ ; CHECK-NEXT: [[ADD_0:%.*]] = fadd fast float [[TMP5]], [[TMP4]]
670
+ ; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast float [[ADD_0]], [[MUL_2]]
671
+ ; CHECK-NEXT: ret float [[ADD_1]]
672
+ ;
673
+ %gep.a.0 = getelementptr inbounds float , ptr %a , i32 0
674
+ %l.a.0 = load float , ptr %gep.a.0 , align 4
675
+ %gep.a.1 = getelementptr inbounds float , ptr %a , i32 1
676
+ %l.a.1 = load float , ptr %gep.a.1 , align 4
677
+ %gep.a.2 = getelementptr inbounds float , ptr %a , i32 2
678
+ %l.a.2 = load float , ptr %gep.a.2 , align 4
679
+
680
+ %gep.b.0 = getelementptr inbounds float , ptr %b , i32 0
681
+ %l.b.0 = load float , ptr %gep.b.0 , align 4
682
+ %gep.b.1 = getelementptr inbounds float , ptr %b , i32 1
683
+ %l.b.1 = load float , ptr %gep.b.1 , align 4
684
+ %gep.b.2 = getelementptr inbounds float , ptr %b , i32 2
685
+ %l.b.2 = load float , ptr %gep.b.2 , align 4
686
+
687
+ %mul.0 = fmul fast float %l.a.0 , %l.b.0
688
+ %mul.1 = fmul fast float %l.a.1 , %l.b.1
689
+ %mul.2 = fmul fast float %l.a.2 , %l.b.2
690
+
691
+ %add.0 = fadd fast float %mul.1 , %mul.0
692
+ %add.1 = fadd fast float %add.0 , %mul.2
693
+ ret float %add.1
694
+ }
695
+
696
+
607
697
define double @dot_product_fp64 (ptr %a , ptr %b ) {
608
698
; NON-POW2-LABEL: @dot_product_fp64(
609
699
; NON-POW2-NEXT: [[GEP_A_0:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i32 0
0 commit comments