@@ -18,7 +18,7 @@ target triple = "aarch64-unknown-linux-gnu"
18
18
19
19
; DEBUG-LABEL: LV: Checking a loop in 'trip_count_too_small'
20
20
; DEBUG: LV: Found a loop with a very small trip count. This loop is worth vectorizing only if no scalar iteration overheads are incurred.
21
- ; DEBUG: LV: Not vectorizing: The trip count is below the minial threshold value. .
21
+ ; DEBUG: LV: Not vectorizing: Runtime SCEV check is required with -Os/-Oz .
22
22
23
23
; DEBUG-LABEL: LV: Checking a loop in 'too_many_runtime_checks'
24
24
; DEBUG: LV: Found trip count: 0
@@ -490,9 +490,103 @@ while.end:
490
490
ret void
491
491
}
492
492
493
+ ; This has a trip-count of 4, and should vectorize with vf==4.
494
+ define i32 @tc4 (ptr noundef readonly captures(none) %tmp ) vscale_range(1 ,16 ) {
495
+ ; CHECK-LABEL: define i32 @tc4(
496
+ ; CHECK-SAME: ptr noundef readonly captures(none) [[TMP:%.*]]) #[[ATTR1]] {
497
+ ; CHECK-NEXT: [[ENTRY:.*]]:
498
+ ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
499
+ ; CHECK: [[VECTOR_PH]]:
500
+ ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
501
+ ; CHECK: [[VECTOR_BODY]]:
502
+ ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
503
+ ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ]
504
+ ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw [4 x i32], ptr [[TMP]], i64 0, i64 [[INDEX]]
505
+ ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX1]], i32 0
506
+ ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
507
+ ; CHECK-NEXT: [[TMP3]] = add <4 x i32> [[VEC_PHI]], [[WIDE_LOAD]]
508
+ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
509
+ ; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
510
+ ; CHECK: [[MIDDLE_BLOCK]]:
511
+ ; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP3]])
512
+ ; CHECK-NEXT: br i1 true, label %[[FOR_COND_CLEANUP:.*]], label %[[SCALAR_PH]]
513
+ ; CHECK: [[SCALAR_PH]]:
514
+ ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 4, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
515
+ ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP4]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
516
+ ; CHECK-NEXT: br label %[[FOR_BODY:.*]]
517
+ ; CHECK: [[FOR_COND_CLEANUP]]:
518
+ ; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD:%.*]], %[[FOR_BODY]] ], [ [[TMP4]], %[[MIDDLE_BLOCK]] ]
519
+ ; CHECK-NEXT: ret i32 [[ADD_LCSSA]]
520
+ ; CHECK: [[FOR_BODY]]:
521
+ ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
522
+ ; CHECK-NEXT: [[SUM_0179:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[ADD]], %[[FOR_BODY]] ]
523
+ ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw [4 x i32], ptr [[TMP]], i64 0, i64 [[INDVARS_IV]]
524
+ ; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
525
+ ; CHECK-NEXT: [[ADD]] = add i32 [[SUM_0179]], [[TMP5]]
526
+ ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
527
+ ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 4
528
+ ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
529
+ ;
530
+ entry:
531
+ br label %for.body
532
+
533
+ for.cond.cleanup: ; preds = %for.body
534
+ %add.lcssa = phi i32 [ %add , %for.body ]
535
+ ret i32 %add.lcssa
536
+
537
+ for.body: ; preds = %entry, %for.body
538
+ %indvars.iv = phi i64 [ 0 , %entry ], [ %indvars.iv.next , %for.body ]
539
+ %sum.0179 = phi i32 [ 0 , %entry ], [ %add , %for.body ]
540
+ %arrayidx1 = getelementptr inbounds nuw [4 x i32 ], ptr %tmp , i64 0 , i64 %indvars.iv
541
+ %0 = load i32 , ptr %arrayidx1 , align 4
542
+ %add = add i32 %sum.0179 , %0
543
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv , 1
544
+ %exitcond.not = icmp eq i64 %indvars.iv.next , 4
545
+ br i1 %exitcond.not , label %for.cond.cleanup , label %for.body
546
+ }
547
+
548
+ ; This has a trip-count of 4 from a profile.
549
+ define i32 @tc4_from_profile (ptr noundef readonly captures(none) %tmp , i64 %N ) vscale_range(1 ,16 ) {
550
+ ; CHECK-LABEL: define i32 @tc4_from_profile(
551
+ ; CHECK-SAME: ptr noundef readonly captures(none) [[TMP:%.*]], i64 [[N:%.*]]) #[[ATTR1]] {
552
+ ; CHECK-NEXT: [[ENTRY:.*]]:
553
+ ; CHECK-NEXT: br label %[[FOR_BODY:.*]]
554
+ ; CHECK: [[FOR_COND_CLEANUP:.*]]:
555
+ ; CHECK-NEXT: [[TMP4:%.*]] = phi i32 [ [[ADD:%.*]], %[[FOR_BODY]] ]
556
+ ; CHECK-NEXT: ret i32 [[TMP4]]
557
+ ; CHECK: [[FOR_BODY]]:
558
+ ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
559
+ ; CHECK-NEXT: [[SUM_0179:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[ADD]], %[[FOR_BODY]] ]
560
+ ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw [4 x i32], ptr [[TMP]], i64 0, i64 [[INDVARS_IV]]
561
+ ; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4
562
+ ; CHECK-NEXT: [[ADD]] = add i32 [[SUM_0179]], [[TMP0]]
563
+ ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
564
+ ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
565
+ ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF9:![0-9]+]]
566
+ ;
567
+ entry:
568
+ br label %for.body
569
+
570
+ for.cond.cleanup: ; preds = %for.body
571
+ %add.lcssa = phi i32 [ %add , %for.body ]
572
+ ret i32 %add.lcssa
573
+
574
+ for.body: ; preds = %entry, %for.body
575
+ %indvars.iv = phi i64 [ 0 , %entry ], [ %indvars.iv.next , %for.body ]
576
+ %sum.0179 = phi i32 [ 0 , %entry ], [ %add , %for.body ]
577
+ %arrayidx1 = getelementptr inbounds nuw [4 x i32 ], ptr %tmp , i64 0 , i64 %indvars.iv
578
+ %0 = load i32 , ptr %arrayidx1 , align 4
579
+ %add = add i32 %sum.0179 , %0
580
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv , 1
581
+ %exitcond.not = icmp eq i64 %indvars.iv.next , %N
582
+ br i1 %exitcond.not , label %for.cond.cleanup , label %for.body , !prof !2
583
+ }
584
+
493
585
494
586
!0 = distinct !{!0 , !1 }
495
587
!1 = !{!"llvm.loop.vectorize.predicate.enable" , i1 true }
588
+ !2 = !{!"branch_weights" , i32 10 , i32 30 }
589
+
496
590
;.
497
591
; CHECK-VS1: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
498
592
; CHECK-VS1: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
@@ -501,6 +595,9 @@ while.end:
501
595
; CHECK-VS1: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]]}
502
596
; CHECK-VS1: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]], [[META2]]}
503
597
; CHECK-VS1: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]]}
598
+ ; CHECK-VS1: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]], [[META2]]}
599
+ ; CHECK-VS1: [[LOOP8]] = distinct !{[[LOOP8]], [[META2]], [[META1]]}
600
+ ; CHECK-VS1: [[PROF9]] = !{!"branch_weights", i32 10, i32 30}
504
601
;.
505
602
; CHECK-VS2: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
506
603
; CHECK-VS2: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
@@ -509,4 +606,7 @@ while.end:
509
606
; CHECK-VS2: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]]}
510
607
; CHECK-VS2: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]], [[META2]]}
511
608
; CHECK-VS2: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]]}
609
+ ; CHECK-VS2: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]], [[META2]]}
610
+ ; CHECK-VS2: [[LOOP8]] = distinct !{[[LOOP8]], [[META2]], [[META1]]}
611
+ ; CHECK-VS2: [[PROF9]] = !{!"branch_weights", i32 10, i32 30}
512
612
;.
0 commit comments