@@ -589,88 +589,100 @@ for.exit: ; preds = %for.body
589
589
ret i32 %add
590
590
}
591
591
592
- define i32 @not_dotp_not_phi (ptr %a , ptr %b ) {
592
+ define i32 @not_dotp_not_phi (ptr %a , ptr noalias %b , ptr noalias %c ) {
593
593
; CHECK-INTERLEAVE1-LABEL: define i32 @not_dotp_not_phi(
594
- ; CHECK-INTERLEAVE1-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
594
+ ; CHECK-INTERLEAVE1-SAME: ptr [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C :%.*]]) #[[ATTR0]] {
595
595
; CHECK-INTERLEAVE1-NEXT: entry:
596
596
; CHECK-INTERLEAVE1-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
597
597
; CHECK-INTERLEAVE1: vector.ph:
598
598
; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_BODY:%.*]]
599
599
; CHECK-INTERLEAVE1: vector.body:
600
600
; CHECK-INTERLEAVE1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
601
- ; CHECK-INTERLEAVE1-NEXT: [[VECTOR_RECUR:%.*]] = phi <16 x i32> [ <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP8 :%.*]], [[VECTOR_BODY]] ]
601
+ ; CHECK-INTERLEAVE1-NEXT: [[VECTOR_RECUR:%.*]] = phi <16 x i32> [ <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP7 :%.*]], [[VECTOR_BODY]] ]
602
602
; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
603
- ; CHECK-INTERLEAVE1-NEXT: [[TMP2 :%.*]] = getelementptr i8, ptr [[TMP1]], i32 0
604
- ; CHECK-INTERLEAVE1-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP2 ]], align 1
605
- ; CHECK-INTERLEAVE1-NEXT: [[TMP3 :%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
603
+ ; CHECK-INTERLEAVE1-NEXT: [[TMP3 :%.*]] = getelementptr i8, ptr [[TMP1]], i32 0
604
+ ; CHECK-INTERLEAVE1-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP3 ]], align 1
605
+ ; CHECK-INTERLEAVE1-NEXT: [[TMP2 :%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
606
606
; CHECK-INTERLEAVE1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]]
607
- ; CHECK-INTERLEAVE1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP4]], i32 0
608
- ; CHECK-INTERLEAVE1-NEXT: [[WIDE_LOAD1:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1
609
- ; CHECK-INTERLEAVE1-NEXT: [[TMP6:%.*]] = zext <16 x i8> [[WIDE_LOAD1]] to <16 x i32>
610
- ; CHECK-INTERLEAVE1-NEXT: [[TMP7:%.*]] = mul <16 x i32> [[TMP6]], [[TMP3]]
611
- ; CHECK-INTERLEAVE1-NEXT: [[TMP8]] = add <16 x i32> [[TMP7]], [[TMP6]]
607
+ ; CHECK-INTERLEAVE1-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[TMP4]], i32 0
608
+ ; CHECK-INTERLEAVE1-NEXT: [[WIDE_LOAD1:%.*]] = load <16 x i8>, ptr [[TMP11]], align 1
609
+ ; CHECK-INTERLEAVE1-NEXT: [[TMP5:%.*]] = zext <16 x i8> [[WIDE_LOAD1]] to <16 x i32>
610
+ ; CHECK-INTERLEAVE1-NEXT: [[TMP6:%.*]] = mul <16 x i32> [[TMP5]], [[TMP2]]
611
+ ; CHECK-INTERLEAVE1-NEXT: [[TMP7]] = add <16 x i32> [[TMP6]], [[TMP5]]
612
+ ; CHECK-INTERLEAVE1-NEXT: [[TMP8:%.*]] = shufflevector <16 x i32> [[VECTOR_RECUR]], <16 x i32> [[TMP7]], <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
613
+ ; CHECK-INTERLEAVE1-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[C]], i64 [[INDEX]]
614
+ ; CHECK-INTERLEAVE1-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP13]], i32 0
615
+ ; CHECK-INTERLEAVE1-NEXT: store <16 x i32> [[TMP8]], ptr [[TMP10]], align 4
612
616
; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
613
617
; CHECK-INTERLEAVE1-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
614
618
; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
615
619
; CHECK-INTERLEAVE1: middle.block:
616
- ; CHECK-INTERLEAVE1-NEXT: [[TMP10 :%.*]] = extractelement <16 x i32> [[TMP8 ]], i32 15
617
- ; CHECK-INTERLEAVE1-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i32> [[TMP8 ]], i32 15
620
+ ; CHECK-INTERLEAVE1-NEXT: [[TMP12 :%.*]] = extractelement <16 x i32> [[TMP7 ]], i32 15
621
+ ; CHECK-INTERLEAVE1-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i32> [[TMP7 ]], i32 15
618
622
; CHECK-INTERLEAVE1-NEXT: br i1 true, label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
619
623
; CHECK-INTERLEAVE1: scalar.ph:
620
624
;
621
625
; CHECK-INTERLEAVED-LABEL: define i32 @not_dotp_not_phi(
622
- ; CHECK-INTERLEAVED-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
626
+ ; CHECK-INTERLEAVED-SAME: ptr [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C :%.*]]) #[[ATTR0]] {
623
627
; CHECK-INTERLEAVED-NEXT: entry:
624
628
; CHECK-INTERLEAVED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
625
629
; CHECK-INTERLEAVED: vector.ph:
626
630
; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]]
627
631
; CHECK-INTERLEAVED: vector.body:
628
632
; CHECK-INTERLEAVED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
629
- ; CHECK-INTERLEAVED-NEXT: [[VECTOR_RECUR:%.*]] = phi <16 x i32> [ <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP8 :%.*]], [[VECTOR_BODY]] ]
633
+ ; CHECK-INTERLEAVED-NEXT: [[VECTOR_RECUR:%.*]] = phi <16 x i32> [ <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP7 :%.*]], [[VECTOR_BODY]] ]
630
634
; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
631
- ; CHECK-INTERLEAVED-NEXT: [[TMP2 :%.*]] = getelementptr i8, ptr [[TMP1]], i32 0
632
- ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP2 ]], align 1
633
- ; CHECK-INTERLEAVED-NEXT: [[TMP3 :%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
635
+ ; CHECK-INTERLEAVED-NEXT: [[TMP3 :%.*]] = getelementptr i8, ptr [[TMP1]], i32 0
636
+ ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP3 ]], align 1
637
+ ; CHECK-INTERLEAVED-NEXT: [[TMP2 :%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
634
638
; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]]
635
- ; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP4]], i32 0
636
- ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD1:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1
637
- ; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = zext <16 x i8> [[WIDE_LOAD1]] to <16 x i32>
638
- ; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = mul <16 x i32> [[TMP6]], [[TMP3]]
639
- ; CHECK-INTERLEAVED-NEXT: [[TMP8]] = add <16 x i32> [[TMP7]], [[TMP6]]
639
+ ; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[TMP4]], i32 0
640
+ ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD1:%.*]] = load <16 x i8>, ptr [[TMP11]], align 1
641
+ ; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = zext <16 x i8> [[WIDE_LOAD1]] to <16 x i32>
642
+ ; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = mul <16 x i32> [[TMP5]], [[TMP2]]
643
+ ; CHECK-INTERLEAVED-NEXT: [[TMP7]] = add <16 x i32> [[TMP6]], [[TMP5]]
644
+ ; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = shufflevector <16 x i32> [[VECTOR_RECUR]], <16 x i32> [[TMP7]], <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
645
+ ; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[C]], i64 [[INDEX]]
646
+ ; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP13]], i32 0
647
+ ; CHECK-INTERLEAVED-NEXT: store <16 x i32> [[TMP8]], ptr [[TMP10]], align 4
640
648
; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
641
649
; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
642
650
; CHECK-INTERLEAVED-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
643
651
; CHECK-INTERLEAVED: middle.block:
644
- ; CHECK-INTERLEAVED-NEXT: [[TMP10 :%.*]] = extractelement <16 x i32> [[TMP8 ]], i32 15
645
- ; CHECK-INTERLEAVED-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i32> [[TMP8 ]], i32 15
652
+ ; CHECK-INTERLEAVED-NEXT: [[TMP12 :%.*]] = extractelement <16 x i32> [[TMP7 ]], i32 15
653
+ ; CHECK-INTERLEAVED-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i32> [[TMP7 ]], i32 15
646
654
; CHECK-INTERLEAVED-NEXT: br i1 true, label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
647
655
; CHECK-INTERLEAVED: scalar.ph:
648
656
;
649
657
; CHECK-MAXBW-LABEL: define i32 @not_dotp_not_phi(
650
- ; CHECK-MAXBW-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
658
+ ; CHECK-MAXBW-SAME: ptr [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C :%.*]]) #[[ATTR0]] {
651
659
; CHECK-MAXBW-NEXT: entry:
652
660
; CHECK-MAXBW-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
653
661
; CHECK-MAXBW: vector.ph:
654
662
; CHECK-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]]
655
663
; CHECK-MAXBW: vector.body:
656
664
; CHECK-MAXBW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
657
- ; CHECK-MAXBW-NEXT: [[VECTOR_RECUR:%.*]] = phi <16 x i32> [ <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP8 :%.*]], [[VECTOR_BODY]] ]
665
+ ; CHECK-MAXBW-NEXT: [[VECTOR_RECUR:%.*]] = phi <16 x i32> [ <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP7 :%.*]], [[VECTOR_BODY]] ]
658
666
; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
659
- ; CHECK-MAXBW-NEXT: [[TMP2 :%.*]] = getelementptr i8, ptr [[TMP1]], i32 0
660
- ; CHECK-MAXBW-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP2 ]], align 1
661
- ; CHECK-MAXBW-NEXT: [[TMP3 :%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
667
+ ; CHECK-MAXBW-NEXT: [[TMP3 :%.*]] = getelementptr i8, ptr [[TMP1]], i32 0
668
+ ; CHECK-MAXBW-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP3 ]], align 1
669
+ ; CHECK-MAXBW-NEXT: [[TMP2 :%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
662
670
; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]]
663
- ; CHECK-MAXBW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP4]], i32 0
664
- ; CHECK-MAXBW-NEXT: [[WIDE_LOAD1:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1
665
- ; CHECK-MAXBW-NEXT: [[TMP6:%.*]] = zext <16 x i8> [[WIDE_LOAD1]] to <16 x i32>
666
- ; CHECK-MAXBW-NEXT: [[TMP7:%.*]] = mul <16 x i32> [[TMP6]], [[TMP3]]
667
- ; CHECK-MAXBW-NEXT: [[TMP8]] = add <16 x i32> [[TMP7]], [[TMP6]]
671
+ ; CHECK-MAXBW-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[TMP4]], i32 0
672
+ ; CHECK-MAXBW-NEXT: [[WIDE_LOAD1:%.*]] = load <16 x i8>, ptr [[TMP11]], align 1
673
+ ; CHECK-MAXBW-NEXT: [[TMP5:%.*]] = zext <16 x i8> [[WIDE_LOAD1]] to <16 x i32>
674
+ ; CHECK-MAXBW-NEXT: [[TMP6:%.*]] = mul <16 x i32> [[TMP5]], [[TMP2]]
675
+ ; CHECK-MAXBW-NEXT: [[TMP7]] = add <16 x i32> [[TMP6]], [[TMP5]]
676
+ ; CHECK-MAXBW-NEXT: [[TMP8:%.*]] = shufflevector <16 x i32> [[VECTOR_RECUR]], <16 x i32> [[TMP7]], <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
677
+ ; CHECK-MAXBW-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[C]], i64 [[INDEX]]
678
+ ; CHECK-MAXBW-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP13]], i32 0
679
+ ; CHECK-MAXBW-NEXT: store <16 x i32> [[TMP8]], ptr [[TMP10]], align 4
668
680
; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
669
681
; CHECK-MAXBW-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
670
682
; CHECK-MAXBW-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
671
683
; CHECK-MAXBW: middle.block:
672
- ; CHECK-MAXBW-NEXT: [[TMP10 :%.*]] = extractelement <16 x i32> [[TMP8 ]], i32 15
673
- ; CHECK-MAXBW-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i32> [[TMP8 ]], i32 15
684
+ ; CHECK-MAXBW-NEXT: [[TMP12 :%.*]] = extractelement <16 x i32> [[TMP7 ]], i32 15
685
+ ; CHECK-MAXBW-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i32> [[TMP7 ]], i32 15
674
686
; CHECK-MAXBW-NEXT: br i1 true, label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
675
687
; CHECK-MAXBW: scalar.ph:
676
688
;
@@ -688,6 +700,8 @@ for.body: ; preds = %for.body, %entry
688
700
%ext.b = zext i8 %load.b to i32
689
701
%mul = mul i32 %ext.b , %ext.a
690
702
%add = add i32 %mul , %ext.b
703
+ %gep.c = getelementptr i32 , ptr %c , i64 %iv
704
+ store i32 %accum , ptr %gep.c
691
705
%iv.next = add i64 %iv , 1
692
706
%exitcond.not = icmp eq i64 %iv.next , 1024
693
707
br i1 %exitcond.not , label %for.exit , label %for.body
@@ -946,6 +960,7 @@ define i32 @dotp_unrolled(i32 %num_out, i64 %num_in, ptr %a, ptr %b) {
946
960
; CHECK-MAXBW-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[NUM_IN]], [[N_VEC]]
947
961
; CHECK-MAXBW-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
948
962
; CHECK-MAXBW: scalar.ph:
963
+ ;
949
964
entry:
950
965
br label %for.body
951
966
0 commit comments