@@ -478,233 +478,75 @@ define i32 @tc4(ptr noundef readonly captures(none) %tmp) vscale_range(1,16) {
478
478
; CHECK-LABEL: define i32 @tc4(
479
479
; CHECK-SAME: ptr noundef readonly captures(none) [[TMP:%.*]]) #[[ATTR1]] {
480
480
; CHECK-NEXT: [[ENTRY:.*:]]
481
- ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP]], i64 16
482
- ; CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP]], i64 32
483
- ; CHECK-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP]], i64 48
484
- ; CHECK-NEXT: [[ARRAYIDX30:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP]], i64 64
485
- ; CHECK-NEXT: [[ARRAYIDX33:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP]], i64 80
486
- ; CHECK-NEXT: [[ARRAYIDX46:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP]], i64 96
487
- ; CHECK-NEXT: [[ARRAYIDX49:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP]], i64 112
488
- ; CHECK-NEXT: [[TMP0:%.*]] = add i64 0, 0
489
- ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [4 x i32], ptr [[TMP]], i64 0, i64 [[TMP0]]
490
- ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i32 0
481
+ ; CHECK-NEXT: [[INDVARS_IV:%.*]] = add i64 0, 0
482
+ ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw [4 x i32], ptr [[TMP]], i64 0, i64 [[INDVARS_IV]]
483
+ ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX1]], i32 0
491
484
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
492
- ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw [4 x i32], ptr [[ARRAYIDX2]], i64 0, i64 [[TMP0]]
493
- ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP3]], i32 0
494
- ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4
495
- ; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD]]
496
- ; CHECK-NEXT: [[TMP6:%.*]] = sub <4 x i32> [[WIDE_LOAD]], [[WIDE_LOAD1]]
497
- ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [4 x i32], ptr [[ARRAYIDX11]], i64 0, i64 [[TMP0]]
498
- ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP7]], i32 0
499
- ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP8]], align 4
500
- ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [4 x i32], ptr [[ARRAYIDX14]], i64 0, i64 [[TMP0]]
501
- ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP9]], i32 0
502
- ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP10]], align 4
503
- ; CHECK-NEXT: [[TMP11:%.*]] = add <4 x i32> [[WIDE_LOAD3]], [[WIDE_LOAD2]]
504
- ; CHECK-NEXT: [[TMP12:%.*]] = sub <4 x i32> [[WIDE_LOAD2]], [[WIDE_LOAD3]]
505
- ; CHECK-NEXT: [[TMP13:%.*]] = add <4 x i32> [[TMP11]], [[TMP5]]
506
- ; CHECK-NEXT: [[TMP14:%.*]] = sub <4 x i32> [[TMP5]], [[TMP11]]
507
- ; CHECK-NEXT: [[TMP15:%.*]] = add <4 x i32> [[TMP12]], [[TMP6]]
508
- ; CHECK-NEXT: [[TMP16:%.*]] = sub <4 x i32> [[TMP6]], [[TMP12]]
509
- ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw [4 x i32], ptr [[ARRAYIDX30]], i64 0, i64 [[TMP0]]
510
- ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP17]], i32 0
511
- ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP18]], align 4
512
- ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [4 x i32], ptr [[ARRAYIDX33]], i64 0, i64 [[TMP0]]
513
- ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP19]], i32 0
514
- ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i32>, ptr [[TMP20]], align 4
515
- ; CHECK-NEXT: [[TMP21:%.*]] = add <4 x i32> [[WIDE_LOAD5]], [[WIDE_LOAD4]]
516
- ; CHECK-NEXT: [[TMP22:%.*]] = sub <4 x i32> [[WIDE_LOAD4]], [[WIDE_LOAD5]]
517
- ; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [4 x i32], ptr [[ARRAYIDX46]], i64 0, i64 [[TMP0]]
518
- ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP23]], i32 0
519
- ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i32>, ptr [[TMP24]], align 4
520
- ; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [4 x i32], ptr [[ARRAYIDX49]], i64 0, i64 [[TMP0]]
521
- ; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP25]], i32 0
522
- ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i32>, ptr [[TMP26]], align 4
523
- ; CHECK-NEXT: [[TMP27:%.*]] = add <4 x i32> [[WIDE_LOAD7]], [[WIDE_LOAD6]]
524
- ; CHECK-NEXT: [[TMP28:%.*]] = sub <4 x i32> [[WIDE_LOAD6]], [[WIDE_LOAD7]]
525
- ; CHECK-NEXT: [[TMP29:%.*]] = add <4 x i32> [[TMP27]], [[TMP21]]
526
- ; CHECK-NEXT: [[TMP30:%.*]] = sub <4 x i32> [[TMP21]], [[TMP27]]
527
- ; CHECK-NEXT: [[TMP31:%.*]] = add <4 x i32> [[TMP28]], [[TMP22]]
528
- ; CHECK-NEXT: [[TMP32:%.*]] = sub <4 x i32> [[TMP22]], [[TMP28]]
529
- ; CHECK-NEXT: [[TMP33:%.*]] = add <4 x i32> [[TMP29]], [[TMP13]]
530
- ; CHECK-NEXT: [[TMP34:%.*]] = lshr <4 x i32> [[TMP33]], splat (i32 15)
531
- ; CHECK-NEXT: [[TMP35:%.*]] = and <4 x i32> [[TMP34]], splat (i32 65537)
532
- ; CHECK-NEXT: [[TMP36:%.*]] = mul nuw <4 x i32> [[TMP35]], splat (i32 65535)
533
- ; CHECK-NEXT: [[TMP37:%.*]] = add <4 x i32> [[TMP36]], [[TMP33]]
534
- ; CHECK-NEXT: [[TMP38:%.*]] = xor <4 x i32> [[TMP37]], [[TMP36]]
535
- ; CHECK-NEXT: [[TMP39:%.*]] = sub <4 x i32> [[TMP13]], [[TMP29]]
536
- ; CHECK-NEXT: [[TMP40:%.*]] = lshr <4 x i32> [[TMP39]], splat (i32 15)
537
- ; CHECK-NEXT: [[TMP41:%.*]] = and <4 x i32> [[TMP40]], splat (i32 65537)
538
- ; CHECK-NEXT: [[TMP42:%.*]] = mul nuw <4 x i32> [[TMP41]], splat (i32 65535)
539
- ; CHECK-NEXT: [[TMP43:%.*]] = add <4 x i32> [[TMP42]], [[TMP39]]
540
- ; CHECK-NEXT: [[TMP44:%.*]] = xor <4 x i32> [[TMP43]], [[TMP42]]
541
- ; CHECK-NEXT: [[TMP45:%.*]] = add <4 x i32> [[TMP31]], [[TMP15]]
542
- ; CHECK-NEXT: [[TMP46:%.*]] = lshr <4 x i32> [[TMP45]], splat (i32 15)
543
- ; CHECK-NEXT: [[TMP47:%.*]] = and <4 x i32> [[TMP46]], splat (i32 65537)
544
- ; CHECK-NEXT: [[TMP48:%.*]] = mul nuw <4 x i32> [[TMP47]], splat (i32 65535)
545
- ; CHECK-NEXT: [[TMP49:%.*]] = add <4 x i32> [[TMP48]], [[TMP45]]
546
- ; CHECK-NEXT: [[TMP50:%.*]] = xor <4 x i32> [[TMP49]], [[TMP48]]
547
- ; CHECK-NEXT: [[TMP51:%.*]] = sub <4 x i32> [[TMP15]], [[TMP31]]
548
- ; CHECK-NEXT: [[TMP52:%.*]] = lshr <4 x i32> [[TMP51]], splat (i32 15)
549
- ; CHECK-NEXT: [[TMP53:%.*]] = and <4 x i32> [[TMP52]], splat (i32 65537)
550
- ; CHECK-NEXT: [[TMP54:%.*]] = mul nuw <4 x i32> [[TMP53]], splat (i32 65535)
551
- ; CHECK-NEXT: [[TMP55:%.*]] = add <4 x i32> [[TMP54]], [[TMP51]]
552
- ; CHECK-NEXT: [[TMP56:%.*]] = xor <4 x i32> [[TMP55]], [[TMP54]]
553
- ; CHECK-NEXT: [[TMP57:%.*]] = add <4 x i32> [[TMP30]], [[TMP14]]
554
- ; CHECK-NEXT: [[TMP58:%.*]] = lshr <4 x i32> [[TMP57]], splat (i32 15)
555
- ; CHECK-NEXT: [[TMP59:%.*]] = and <4 x i32> [[TMP58]], splat (i32 65537)
556
- ; CHECK-NEXT: [[TMP60:%.*]] = mul nuw <4 x i32> [[TMP59]], splat (i32 65535)
557
- ; CHECK-NEXT: [[TMP61:%.*]] = add <4 x i32> [[TMP60]], [[TMP57]]
558
- ; CHECK-NEXT: [[TMP62:%.*]] = xor <4 x i32> [[TMP61]], [[TMP60]]
559
- ; CHECK-NEXT: [[TMP63:%.*]] = sub <4 x i32> [[TMP14]], [[TMP30]]
560
- ; CHECK-NEXT: [[TMP64:%.*]] = lshr <4 x i32> [[TMP63]], splat (i32 15)
561
- ; CHECK-NEXT: [[TMP65:%.*]] = and <4 x i32> [[TMP64]], splat (i32 65537)
562
- ; CHECK-NEXT: [[TMP66:%.*]] = mul nuw <4 x i32> [[TMP65]], splat (i32 65535)
563
- ; CHECK-NEXT: [[TMP67:%.*]] = add <4 x i32> [[TMP66]], [[TMP63]]
564
- ; CHECK-NEXT: [[TMP68:%.*]] = xor <4 x i32> [[TMP67]], [[TMP66]]
565
- ; CHECK-NEXT: [[TMP69:%.*]] = add <4 x i32> [[TMP32]], [[TMP16]]
566
- ; CHECK-NEXT: [[TMP70:%.*]] = lshr <4 x i32> [[TMP69]], splat (i32 15)
567
- ; CHECK-NEXT: [[TMP71:%.*]] = and <4 x i32> [[TMP70]], splat (i32 65537)
568
- ; CHECK-NEXT: [[TMP72:%.*]] = mul nuw <4 x i32> [[TMP71]], splat (i32 65535)
569
- ; CHECK-NEXT: [[TMP73:%.*]] = add <4 x i32> [[TMP72]], [[TMP69]]
570
- ; CHECK-NEXT: [[TMP74:%.*]] = xor <4 x i32> [[TMP73]], [[TMP72]]
571
- ; CHECK-NEXT: [[TMP75:%.*]] = sub <4 x i32> [[TMP16]], [[TMP32]]
572
- ; CHECK-NEXT: [[TMP76:%.*]] = lshr <4 x i32> [[TMP75]], splat (i32 15)
573
- ; CHECK-NEXT: [[TMP77:%.*]] = and <4 x i32> [[TMP76]], splat (i32 65537)
574
- ; CHECK-NEXT: [[TMP78:%.*]] = mul nuw <4 x i32> [[TMP77]], splat (i32 65535)
575
- ; CHECK-NEXT: [[TMP79:%.*]] = add <4 x i32> [[TMP78]], [[TMP75]]
576
- ; CHECK-NEXT: [[TMP80:%.*]] = xor <4 x i32> [[TMP79]], [[TMP78]]
577
- ; CHECK-NEXT: [[TMP81:%.*]] = add <4 x i32> [[TMP74]], [[TMP80]]
578
- ; CHECK-NEXT: [[TMP82:%.*]] = add <4 x i32> [[TMP81]], [[TMP68]]
579
- ; CHECK-NEXT: [[TMP83:%.*]] = add <4 x i32> [[TMP82]], [[TMP62]]
580
- ; CHECK-NEXT: [[TMP84:%.*]] = add <4 x i32> [[TMP83]], [[TMP44]]
581
- ; CHECK-NEXT: [[TMP85:%.*]] = add <4 x i32> [[TMP84]], [[TMP38]]
582
- ; CHECK-NEXT: [[TMP86:%.*]] = add <4 x i32> [[TMP85]], [[TMP56]]
583
- ; CHECK-NEXT: [[TMP87:%.*]] = add <4 x i32> [[TMP86]], [[TMP50]]
584
- ; CHECK-NEXT: [[TMP88:%.*]] = and <4 x i32> [[TMP87]], splat (i32 65535)
585
- ; CHECK-NEXT: [[TMP89:%.*]] = lshr <4 x i32> [[TMP87]], splat (i32 16)
586
- ; CHECK-NEXT: [[TMP90:%.*]] = add <4 x i32> [[TMP89]], zeroinitializer
587
- ; CHECK-NEXT: [[TMP91:%.*]] = add <4 x i32> [[TMP90]], [[TMP88]]
485
+ ; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> zeroinitializer, [[WIDE_LOAD]]
588
486
; CHECK-NEXT: [[INDEX_NEXT:%.*]] = add nuw i64 0, 4
589
- ; CHECK-NEXT: [[TMP92 :%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP91 ]])
590
- ; CHECK-NEXT: ret i32 [[TMP92 ]]
487
+ ; CHECK-NEXT: [[TMP4 :%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP3 ]])
488
+ ; CHECK-NEXT: ret i32 [[TMP4 ]]
591
489
;
592
490
entry:
593
- %arrayidx2 = getelementptr inbounds nuw i8 , ptr %tmp , i64 16
594
- %arrayidx11 = getelementptr inbounds nuw i8 , ptr %tmp , i64 32
595
- %arrayidx14 = getelementptr inbounds nuw i8 , ptr %tmp , i64 48
596
- %arrayidx30 = getelementptr inbounds nuw i8 , ptr %tmp , i64 64
597
- %arrayidx33 = getelementptr inbounds nuw i8 , ptr %tmp , i64 80
598
- %arrayidx46 = getelementptr inbounds nuw i8 , ptr %tmp , i64 96
599
- %arrayidx49 = getelementptr inbounds nuw i8 , ptr %tmp , i64 112
600
491
br label %for.body
601
492
602
493
for.cond.cleanup: ; preds = %for.body
603
- %add89 .lcssa = phi i32 [ %add89 , %for.body ]
604
- ret i32 %add89 .lcssa
494
+ %add .lcssa = phi i32 [ %add , %for.body ]
495
+ ret i32 %add .lcssa
605
496
606
497
for.body: ; preds = %entry, %for.body
607
498
%indvars.iv = phi i64 [ 0 , %entry ], [ %indvars.iv.next , %for.body ]
608
- %sum.0179 = phi i32 [ 0 , %entry ], [ %add89 , %for.body ]
499
+ %sum.0179 = phi i32 [ 0 , %entry ], [ %add , %for.body ]
609
500
%arrayidx1 = getelementptr inbounds nuw [4 x i32 ], ptr %tmp , i64 0 , i64 %indvars.iv
610
501
%0 = load i32 , ptr %arrayidx1 , align 4
611
- %arrayidx4 = getelementptr inbounds nuw [4 x i32 ], ptr %arrayidx2 , i64 0 , i64 %indvars.iv
612
- %1 = load i32 , ptr %arrayidx4 , align 4
613
- %add = add i32 %1 , %0
614
- %sub = sub i32 %0 , %1
615
- %arrayidx13 = getelementptr inbounds nuw [4 x i32 ], ptr %arrayidx11 , i64 0 , i64 %indvars.iv
616
- %2 = load i32 , ptr %arrayidx13 , align 4
617
- %arrayidx16 = getelementptr inbounds nuw [4 x i32 ], ptr %arrayidx14 , i64 0 , i64 %indvars.iv
618
- %3 = load i32 , ptr %arrayidx16 , align 4
619
- %add17 = add i32 %3 , %2
620
- %sub24 = sub i32 %2 , %3
621
- %add25 = add i32 %add17 , %add
622
- %sub26 = sub i32 %add , %add17
623
- %add27 = add i32 %sub24 , %sub
624
- %sub28 = sub i32 %sub , %sub24
625
- %arrayidx32 = getelementptr inbounds nuw [4 x i32 ], ptr %arrayidx30 , i64 0 , i64 %indvars.iv
626
- %4 = load i32 , ptr %arrayidx32 , align 4
627
- %arrayidx35 = getelementptr inbounds nuw [4 x i32 ], ptr %arrayidx33 , i64 0 , i64 %indvars.iv
628
- %5 = load i32 , ptr %arrayidx35 , align 4
629
- %add36 = add i32 %5 , %4
630
- %sub44 = sub i32 %4 , %5
631
- %arrayidx48 = getelementptr inbounds nuw [4 x i32 ], ptr %arrayidx46 , i64 0 , i64 %indvars.iv
632
- %6 = load i32 , ptr %arrayidx48 , align 4
633
- %arrayidx51 = getelementptr inbounds nuw [4 x i32 ], ptr %arrayidx49 , i64 0 , i64 %indvars.iv
634
- %7 = load i32 , ptr %arrayidx51 , align 4
635
- %add52 = add i32 %7 , %6
636
- %sub60 = sub i32 %6 , %7
637
- %add61 = add i32 %add52 , %add36
638
- %sub62 = sub i32 %add36 , %add52
639
- %add63 = add i32 %sub60 , %sub44
640
- %sub64 = sub i32 %sub44 , %sub60
641
- %add65 = add i32 %add61 , %add25
642
- %shr.i173 = lshr i32 %add65 , 15
643
- %and.i174 = and i32 %shr.i173 , 65537
644
- %mul.i175 = mul nuw i32 %and.i174 , 65535
645
- %add.i176 = add i32 %mul.i175 , %add65
646
- %xor.i177 = xor i32 %add.i176 , %mul.i175
647
- %sub66 = sub i32 %add25 , %add61
648
- %shr.i168 = lshr i32 %sub66 , 15
649
- %and.i169 = and i32 %shr.i168 , 65537
650
- %mul.i170 = mul nuw i32 %and.i169 , 65535
651
- %add.i171 = add i32 %mul.i170 , %sub66
652
- %xor.i172 = xor i32 %add.i171 , %mul.i170
653
- %add69 = add i32 %add63 , %add27
654
- %shr.i163 = lshr i32 %add69 , 15
655
- %and.i164 = and i32 %shr.i163 , 65537
656
- %mul.i165 = mul nuw i32 %and.i164 , 65535
657
- %add.i166 = add i32 %mul.i165 , %add69
658
- %xor.i167 = xor i32 %add.i166 , %mul.i165
659
- %sub71 = sub i32 %add27 , %add63
660
- %shr.i158 = lshr i32 %sub71 , 15
661
- %and.i159 = and i32 %shr.i158 , 65537
662
- %mul.i160 = mul nuw i32 %and.i159 , 65535
663
- %add.i161 = add i32 %mul.i160 , %sub71
664
- %xor.i162 = xor i32 %add.i161 , %mul.i160
665
- %add75 = add i32 %sub62 , %sub26
666
- %shr.i153 = lshr i32 %add75 , 15
667
- %and.i154 = and i32 %shr.i153 , 65537
668
- %mul.i155 = mul nuw i32 %and.i154 , 65535
669
- %add.i156 = add i32 %mul.i155 , %add75
670
- %xor.i157 = xor i32 %add.i156 , %mul.i155
671
- %sub77 = sub i32 %sub26 , %sub62
672
- %shr.i148 = lshr i32 %sub77 , 15
673
- %and.i149 = and i32 %shr.i148 , 65537
674
- %mul.i150 = mul nuw i32 %and.i149 , 65535
675
- %add.i151 = add i32 %mul.i150 , %sub77
676
- %xor.i152 = xor i32 %add.i151 , %mul.i150
677
- %add81 = add i32 %sub64 , %sub28
678
- %shr.i143 = lshr i32 %add81 , 15
679
- %and.i144 = and i32 %shr.i143 , 65537
680
- %mul.i145 = mul nuw i32 %and.i144 , 65535
681
- %add.i146 = add i32 %mul.i145 , %add81
682
- %xor.i147 = xor i32 %add.i146 , %mul.i145
683
- %sub83 = sub i32 %sub28 , %sub64
684
- %shr.i = lshr i32 %sub83 , 15
685
- %and.i = and i32 %shr.i , 65537
686
- %mul.i = mul nuw i32 %and.i , 65535
687
- %add.i = add i32 %mul.i , %sub83
688
- %xor.i = xor i32 %add.i , %mul.i
689
- %add73 = add i32 %xor.i147 , %xor.i
690
- %add68 = add i32 %add73 , %xor.i152
691
- %add74 = add i32 %add68 , %xor.i157
692
- %add79 = add i32 %add74 , %xor.i172
693
- %add80 = add i32 %add79 , %xor.i177
694
- %add85 = add i32 %add80 , %xor.i162
695
- %add86 = add i32 %add85 , %xor.i167
696
- %conv87 = and i32 %add86 , 65535
697
- %shr = lshr i32 %add86 , 16
698
- %add88 = add i32 %shr , %sum.0179
699
- %add89 = add i32 %add88 , %conv87
502
+ %add = add i32 %sum.0179 , %0
700
503
%indvars.iv.next = add nuw nsw i64 %indvars.iv , 1
701
504
%exitcond.not = icmp eq i64 %indvars.iv.next , 4
702
505
br i1 %exitcond.not , label %for.cond.cleanup , label %for.body
703
506
}
704
507
508
+ ; This has a trip-count of 4 from a profile.
509
+ define i32 @tc4_from_profile (ptr noundef readonly captures(none) %tmp , i64 %N ) vscale_range(1 ,16 ) {
510
+ ; CHECK-LABEL: define i32 @tc4_from_profile(
511
+ ; CHECK-SAME: ptr noundef readonly captures(none) [[TMP:%.*]], i64 [[N:%.*]]) #[[ATTR1]] {
512
+ ; CHECK-NEXT: [[ENTRY:.*]]:
513
+ ; CHECK-NEXT: br label %[[FOR_BODY:.*]]
514
+ ; CHECK: [[FOR_COND_CLEANUP:.*]]:
515
+ ; CHECK-NEXT: [[TMP4:%.*]] = phi i32 [ [[ADD:%.*]], %[[FOR_BODY]] ]
516
+ ; CHECK-NEXT: ret i32 [[TMP4]]
517
+ ; CHECK: [[FOR_BODY]]:
518
+ ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
519
+ ; CHECK-NEXT: [[SUM_0179:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[ADD]], %[[FOR_BODY]] ]
520
+ ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw [4 x i32], ptr [[TMP]], i64 0, i64 [[INDVARS_IV]]
521
+ ; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4
522
+ ; CHECK-NEXT: [[ADD]] = add i32 [[SUM_0179]], [[TMP0]]
523
+ ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
524
+ ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
525
+ ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF7:![0-9]+]]
526
+ ;
527
+ entry:
528
+ br label %for.body
529
+
530
+ for.cond.cleanup: ; preds = %for.body
531
+ %add.lcssa = phi i32 [ %add , %for.body ]
532
+ ret i32 %add.lcssa
533
+
534
+ for.body: ; preds = %entry, %for.body
535
+ %indvars.iv = phi i64 [ 0 , %entry ], [ %indvars.iv.next , %for.body ]
536
+ %sum.0179 = phi i32 [ 0 , %entry ], [ %add , %for.body ]
537
+ %arrayidx1 = getelementptr inbounds nuw [4 x i32 ], ptr %tmp , i64 0 , i64 %indvars.iv
538
+ %0 = load i32 , ptr %arrayidx1 , align 4
539
+ %add = add i32 %sum.0179 , %0
540
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv , 1
541
+ %exitcond.not = icmp eq i64 %indvars.iv.next , %N
542
+ br i1 %exitcond.not , label %for.cond.cleanup , label %for.body , !prof !2
543
+ }
544
+
705
545
706
546
!0 = distinct !{!0 , !1 }
707
547
!1 = !{!"llvm.loop.vectorize.predicate.enable" , i1 true }
548
+ !2 = !{!"branch_weights" , i32 10 , i32 30 }
549
+
708
550
;.
709
551
; CHECK-VS1: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
710
552
; CHECK-VS1: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
@@ -713,6 +555,7 @@ for.body: ; preds = %entry, %for.body
713
555
; CHECK-VS1: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]]}
714
556
; CHECK-VS1: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]], [[META2]]}
715
557
; CHECK-VS1: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]]}
558
+ ; CHECK-VS1: [[PROF7]] = !{!"branch_weights", i32 10, i32 30}
716
559
;.
717
560
; CHECK-VS2: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
718
561
; CHECK-VS2: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
@@ -721,4 +564,5 @@ for.body: ; preds = %entry, %for.body
721
564
; CHECK-VS2: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]]}
722
565
; CHECK-VS2: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]], [[META2]]}
723
566
; CHECK-VS2: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]]}
567
+ ; CHECK-VS2: [[PROF7]] = !{!"branch_weights", i32 10, i32 30}
724
568
;.
0 commit comments