@@ -555,3 +555,195 @@ for.end17: ; preds = %for.inc15
555
555
; CHECK: for.end17: ; preds = %for.inc15
556
556
; CHECK: ret void
557
557
558
+ ;;-----------------------------------Test case 09-------------------------------
559
+ ;; Test that a flow dependency in outer loop doesn't prevent interchange in
560
+ ;; loops i and j.
561
+ ;;
562
+ ;; for (int k = 0; k < 100; ++k) {
563
+ ;; T[k] = fn1();
564
+ ;; for (int i = 0; i < 1000; ++i)
565
+ ;; for(int j = 1; j < 1000; ++j)
566
+ ;; Arr[j][i] = Arr[j][i]+k;
567
+ ;; fn2(T[k]);
568
+ ;; }
569
+
570
+ @T = internal global [100 x double ] zeroinitializer , align 4
571
+ @Arr = internal global [1000 x [1000 x i32 ]] zeroinitializer , align 4
572
+
573
+ define void @interchange_09 (i32 %k ) {
574
+ entry:
575
+ br label %for.body
576
+
577
+ for.cond.cleanup: ; preds = %for.cond.cleanup4
578
+ ret void
579
+
580
+ for.body: ; preds = %for.cond.cleanup4, %entry
581
+ %indvars.iv45 = phi i64 [ 0 , %entry ], [ %indvars.iv.next46 , %for.cond.cleanup4 ]
582
+ %call = call double @fn1 ()
583
+ %arrayidx = getelementptr inbounds [100 x double ], [100 x double ]* @T , i64 0 , i64 %indvars.iv45
584
+ store double %call , double * %arrayidx , align 8
585
+ br label %for.cond6.preheader
586
+
587
+ for.cond6.preheader: ; preds = %for.cond.cleanup8, %for.body
588
+ %indvars.iv42 = phi i64 [ 0 , %for.body ], [ %indvars.iv.next43 , %for.cond.cleanup8 ]
589
+ br label %for.body9
590
+
591
+ for.cond.cleanup4: ; preds = %for.cond.cleanup8
592
+ %tmp = load double , double * %arrayidx , align 8
593
+ call void @fn2 (double %tmp )
594
+ %indvars.iv.next46 = add nuw nsw i64 %indvars.iv45 , 1
595
+ %exitcond47 = icmp ne i64 %indvars.iv.next46 , 100
596
+ br i1 %exitcond47 , label %for.body , label %for.cond.cleanup
597
+
598
+ for.cond.cleanup8: ; preds = %for.body9
599
+ %indvars.iv.next43 = add nuw nsw i64 %indvars.iv42 , 1
600
+ %exitcond44 = icmp ne i64 %indvars.iv.next43 , 1000
601
+ br i1 %exitcond44 , label %for.cond6.preheader , label %for.cond.cleanup4
602
+
603
+ for.body9: ; preds = %for.body9, %for.cond6.preheader
604
+ %indvars.iv = phi i64 [ 1 , %for.cond6.preheader ], [ %indvars.iv.next , %for.body9 ]
605
+ %arrayidx13 = getelementptr inbounds [1000 x [1000 x i32 ]], [1000 x [1000 x i32 ]]* @Arr , i64 0 , i64 %indvars.iv , i64 %indvars.iv42
606
+ %tmp1 = load i32 , i32* %arrayidx13 , align 4
607
+ %tmp2 = trunc i64 %indvars.iv45 to i32
608
+ %add = add nsw i32 %tmp1 , %tmp2
609
+ store i32 %add , i32* %arrayidx13 , align 4
610
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv , 1
611
+ %exitcond = icmp ne i64 %indvars.iv.next , 1000
612
+ br i1 %exitcond , label %for.body9 , label %for.cond.cleanup8
613
+ }
614
+
615
+ declare double @fn1 ()
616
+ declare void @fn2 (double )
617
+
618
+
619
+
620
+
621
+
622
+ ;; After interchange %indvars.iv (j) should increment as the middle loop.
623
+ ;; After interchange %indvars.iv42 (i) should increment with the inner most loop.
624
+
625
+ ; CHECK-LABEL: @interchange_09
626
+
627
+ ; CHECK: for.body:
628
+ ; CHECK: %indvars.iv45 = phi i64 [ %indvars.iv.next46, %for.cond.cleanup4 ], [ 0, %for.body.preheader ]
629
+ ; CHECK: %call = call double @fn1()
630
+ ; CHECK: %arrayidx = getelementptr inbounds [100 x double], [100 x double]* @T, i64 0, i64 %indvars.iv45
631
+ ; CHECK: store double %call, double* %arrayidx, align 8
632
+ ; CHECK: br label %for.body9.preheader
633
+
634
+ ; CHECK: for.cond6.preheader.preheader:
635
+ ; CHECK: br label %for.cond6.preheader
636
+
637
+ ; CHECK: for.cond6.preheader:
638
+ ; CHECK: %indvars.iv42 = phi i64 [ %indvars.iv.next43, %for.cond.cleanup8 ], [ 0, %for.cond6.preheader.preheader ]
639
+ ; CHECK: br label %for.body9.split1
640
+
641
+ ; CHECK: for.body9.preheader:
642
+ ; CHECK: br label %for.body9
643
+
644
+ ; CHECK: for.cond.cleanup4:
645
+ ; CHECK: %tmp = load double, double* %arrayidx, align 8
646
+ ; CHECK: call void @fn2(double %tmp)
647
+ ; CHECK: %indvars.iv.next46 = add nuw nsw i64 %indvars.iv45, 1
648
+ ; CHECK: %exitcond47 = icmp ne i64 %indvars.iv.next46, 100
649
+ ; CHECK: br i1 %exitcond47, label %for.body, label %for.cond.cleanup
650
+
651
+ ; CHECK: for.cond.cleanup8:
652
+ ; CHECK: %indvars.iv.next43 = add nuw nsw i64 %indvars.iv42, 1
653
+ ; CHECK: %exitcond44 = icmp ne i64 %indvars.iv.next43, 1000
654
+ ; CHECK: br i1 %exitcond44, label %for.cond6.preheader, label %for.body9.split
655
+
656
+ ; CHECK: for.body9:
657
+ ; CHECK: %indvars.iv = phi i64 [ %indvars.iv.next, %for.body9.split ], [ 1, %for.body9.preheader ]
658
+ ; CHECK: br label %for.cond6.preheader.preheader
659
+
660
+ ; CHECK: for.body9.split1:
661
+ ; CHECK: %arrayidx13 = getelementptr inbounds [1000 x [1000 x i32]], [1000 x [1000 x i32]]* @Arr, i64 0, i64 %indvars.iv, i64 %indvars.iv42
662
+ ; CHECK: store i32 %add, i32* %arrayidx13, align 4
663
+ ; CHECK: br label %for.cond.cleanup8
664
+
665
+ ; CHECK: for.body9.split:
666
+ ; CHECK: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
667
+ ; CHECK: %exitcond = icmp ne i64 %indvars.iv.next, 1000
668
+ ; CHECK: br i1 %exitcond, label %for.body9, label %for.cond.cleanup4
669
+
670
+
671
+ ;;-----------------------------------Test case 10-------------------------------
672
+ ;; Test to make sure we can handle output dependencies.
673
+ ;;
674
+ ;; for (int i = 0; i < 2; ++i)
675
+ ;; for(int j = 0; j < 3; ++j) {
676
+ ;; A[j][i] = i;
677
+ ;; A[j][i+1] = j;
678
+ ;; }
679
+
680
+ @A10 = local_unnamed_addr global [3 x [3 x i32 ]] zeroinitializer , align 16
681
+
682
+ define void @interchange_10 () {
683
+ entry:
684
+ br label %for.cond1.preheader
685
+
686
+ for.cond.loopexit: ; preds = %for.body4
687
+ %exitcond28 = icmp ne i64 %indvars.iv.next27 , 2
688
+ br i1 %exitcond28 , label %for.cond1.preheader , label %for.cond.cleanup
689
+
690
+ for.cond1.preheader: ; preds = %for.cond.loopexit, %entry
691
+ %indvars.iv26 = phi i64 [ 0 , %entry ], [ %indvars.iv.next27 , %for.cond.loopexit ]
692
+ %indvars.iv.next27 = add nuw nsw i64 %indvars.iv26 , 1
693
+ br label %for.body4
694
+
695
+ for.cond.cleanup: ; preds = %for.cond.loopexit
696
+ ret void
697
+
698
+ for.body4: ; preds = %for.body4, %for.cond1.preheader
699
+ %indvars.iv = phi i64 [ 0 , %for.cond1.preheader ], [ %indvars.iv.next , %for.body4 ]
700
+ %arrayidx6 = getelementptr inbounds [3 x [3 x i32 ]], [3 x [3 x i32 ]]* @A10 , i64 0 , i64 %indvars.iv , i64 %indvars.iv26
701
+ %tmp = trunc i64 %indvars.iv26 to i32
702
+ store i32 %tmp , i32* %arrayidx6 , align 4
703
+ %arrayidx10 = getelementptr inbounds [3 x [3 x i32 ]], [3 x [3 x i32 ]]* @A10 , i64 0 , i64 %indvars.iv , i64 %indvars.iv.next27
704
+ %tmp1 = trunc i64 %indvars.iv to i32
705
+ store i32 %tmp1 , i32* %arrayidx10 , align 4
706
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv , 1
707
+ %exitcond = icmp ne i64 %indvars.iv.next , 3
708
+ br i1 %exitcond , label %for.body4 , label %for.cond.loopexit
709
+ }
710
+
711
+ ; CHECK-LABEL: @interchange_10
712
+ ; CHECK: entry:
713
+ ; CHECK: br label %for.body4.preheader
714
+
715
+ ; CHECK: for.cond1.preheader.preheader:
716
+ ; CHECK: br label %for.cond1.preheader
717
+
718
+ ; CHECK: for.cond.loopexit:
719
+ ; CHECK: %exitcond28 = icmp ne i64 %indvars.iv.next27, 2
720
+ ; CHECK: br i1 %exitcond28, label %for.cond1.preheader, label %for.body4.split
721
+
722
+ ; CHECK: for.cond1.preheader:
723
+ ; CHECK: %indvars.iv26 = phi i64 [ %indvars.iv.next27, %for.cond.loopexit ], [ 0, %for.cond1.preheader.preheader ]
724
+ ; CHECK: %indvars.iv.next27 = add nuw nsw i64 %indvars.iv26, 1
725
+ ; CHECK: br label %for.body4.split1
726
+
727
+ ; CHECK: for.body4.preheader:
728
+ ; CHECK: br label %for.body4
729
+
730
+ ; CHECK: for.cond.cleanup:
731
+ ; CHECK: ret void
732
+
733
+ ; CHECK: for.body4:
734
+ ; CHECK: %indvars.iv = phi i64 [ %indvars.iv.next, %for.body4.split ], [ 0, %for.body4.preheader ]
735
+ ; CHECK: br label %for.cond1.preheader.preheader
736
+
737
+ ; CHECK: for.body4.split1:
738
+ ; CHECK: %arrayidx6 = getelementptr inbounds [3 x [3 x i32]], [3 x [3 x i32]]* @A10, i64 0, i64 %indvars.iv, i64 %indvars.iv26
739
+ ; CHECK: %tmp = trunc i64 %indvars.iv26 to i32
740
+ ; CHECK: store i32 %tmp, i32* %arrayidx6, align 4
741
+ ; CHECK: %arrayidx10 = getelementptr inbounds [3 x [3 x i32]], [3 x [3 x i32]]* @A10, i64 0, i64 %indvars.iv, i64 %indvars.iv.next27
742
+ ; CHECK: %tmp1 = trunc i64 %indvars.iv to i32
743
+ ; CHECK: store i32 %tmp1, i32* %arrayidx10, align 4
744
+ ; CHECK: br label %for.cond.loopexit
745
+
746
+ ; CHECK: for.body4.split:
747
+ ; CHECK: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
748
+ ; CHECK: %exitcond = icmp ne i64 %indvars.iv.next, 3
749
+ ; CHECK: br i1 %exitcond, label %for.body4, label %for.cond.cleanup
0 commit comments