@@ -875,3 +875,115 @@ if.end:
875
875
%exitcond.not = icmp eq i64 %indvars.iv.next , %wide.trip.count
876
876
br i1 %exitcond.not , label %for.cond.cleanup , label %for.body
877
877
}
878
+
879
+ declare i64 @payload (i64 , ptr , ptr , i64 )
880
+
881
+ define void @outer_latch_heuristic (ptr %dst , ptr %src , i64 %p , i64 %dim ) {
882
+ ; CHECKOO-LABEL: @outer_latch_heuristic(
883
+ ; CHECKOO-NEXT: entry:
884
+ ; CHECKOO-NEXT: br label [[OUTER_LOOP:%.*]]
885
+ ; CHECKOO: outer.loop:
886
+ ; CHECKOO-NEXT: [[K_020_US:%.*]] = phi i64 [ [[INC7_US:%.*]], [[SELECT_END:%.*]] ], [ 0, [[ENTRY:%.*]] ]
887
+ ; CHECKOO-NEXT: [[J:%.*]] = phi i64 [ [[J_NEXT:%.*]], [[SELECT_END]] ], [ 0, [[ENTRY]] ]
888
+ ; CHECKOO-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[SELECT_END]] ], [ 0, [[ENTRY]] ]
889
+ ; CHECKOO-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds ptr, ptr [[SRC:%.*]], i64 [[I]]
890
+ ; CHECKOO-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARRAYIDX_US]], align 8
891
+ ; CHECKOO-NEXT: [[ARRAYIDX1_US:%.*]] = getelementptr inbounds ptr, ptr [[SRC]], i64 [[J]]
892
+ ; CHECKOO-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARRAYIDX1_US]], align 8
893
+ ; CHECKOO-NEXT: br label [[INNER_LOOP:%.*]]
894
+ ; CHECKOO: inner.loop:
895
+ ; CHECKOO-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[DIM:%.*]], [[OUTER_LOOP]] ], [ [[LSR_IV_NEXT:%.*]], [[INNER_LOOP]] ]
896
+ ; CHECKOO-NEXT: [[DIFF_04_I_US:%.*]] = phi i64 [ [[CALL_I_US:%.*]], [[INNER_LOOP]] ], [ 0, [[OUTER_LOOP]] ]
897
+ ; CHECKOO-NEXT: [[CALL_I_US]] = tail call i64 @payload(i64 [[DIFF_04_I_US]], ptr [[TMP0]], ptr [[TMP1]], i64 [[P:%.*]])
898
+ ; CHECKOO-NEXT: [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], -1
899
+ ; CHECKOO-NEXT: [[EXITCOND_NOT_I_US:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0
900
+ ; CHECKOO-NEXT: br i1 [[EXITCOND_NOT_I_US]], label [[SELECT_END]], label [[INNER_LOOP]]
901
+ ; CHECKOO: latch:
902
+ ; CHECKOO-NEXT: [[CMP2_US:%.*]] = icmp sgt i64 [[CALL_I_US]], -1
903
+ ; CHECKOO-NEXT: [[DIFF_0_LCSSA_I_LOBIT_US:%.*]] = lshr i64 [[CALL_I_US]], 63
904
+ ; CHECKOO-NEXT: [[I_NEXT]] = add nsw i64 [[DIFF_0_LCSSA_I_LOBIT_US]], [[I]]
905
+ ; CHECKOO-NEXT: [[INC4_US:%.*]] = zext i1 [[CMP2_US]] to i64
906
+ ; CHECKOO-NEXT: [[J_NEXT]] = add nsw i64 [[J]], [[INC4_US]]
907
+ ; CHECKOO-NEXT: [[COND_IN_US:%.*]] = select i1 [[CMP2_US]], ptr [[ARRAYIDX1_US]], ptr [[ARRAYIDX_US]]
908
+ ; CHECKOO-NEXT: [[COND_US:%.*]] = load ptr, ptr [[COND_IN_US]], align 8
909
+ ; CHECKOO-NEXT: [[ARRAYIDX6_US:%.*]] = getelementptr inbounds ptr, ptr [[DST:%.*]], i64 [[K_020_US]]
910
+ ; CHECKOO-NEXT: store ptr [[COND_US]], ptr [[ARRAYIDX6_US]], align 8
911
+ ; CHECKOO-NEXT: [[INC7_US]] = add i64 [[K_020_US]], 1
912
+ ; CHECKOO-NEXT: [[EXITCOND23_NOT:%.*]] = icmp eq i64 [[K_020_US]], 1000
913
+ ; CHECKOO-NEXT: br i1 [[EXITCOND23_NOT]], label [[EXIT:%.*]], label [[OUTER_LOOP]]
914
+ ; CHECKOO: exit:
915
+ ; CHECKOO-NEXT: ret void
916
+ ;
917
+ ; CHECKII-LABEL: @outer_latch_heuristic(
918
+ ; CHECKII-NEXT: entry:
919
+ ; CHECKII-NEXT: br label [[OUTER_LOOP:%.*]]
920
+ ; CHECKII: outer.loop:
921
+ ; CHECKII-NEXT: [[K_020_US:%.*]] = phi i64 [ [[INC7_US:%.*]], [[LATCH:%.*]] ], [ 0, [[ENTRY:%.*]] ]
922
+ ; CHECKII-NEXT: [[J:%.*]] = phi i64 [ [[J_NEXT:%.*]], [[LATCH]] ], [ 0, [[ENTRY]] ]
923
+ ; CHECKII-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[LATCH]] ], [ 0, [[ENTRY]] ]
924
+ ; CHECKII-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds ptr, ptr [[SRC:%.*]], i64 [[I]]
925
+ ; CHECKII-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARRAYIDX_US]], align 8
926
+ ; CHECKII-NEXT: [[ARRAYIDX1_US:%.*]] = getelementptr inbounds ptr, ptr [[SRC]], i64 [[J]]
927
+ ; CHECKII-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARRAYIDX1_US]], align 8
928
+ ; CHECKII-NEXT: br label [[INNER_LOOP:%.*]]
929
+ ; CHECKII: inner.loop:
930
+ ; CHECKII-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[DIM:%.*]], [[OUTER_LOOP]] ], [ [[LSR_IV_NEXT:%.*]], [[INNER_LOOP]] ]
931
+ ; CHECKII-NEXT: [[DIFF_04_I_US:%.*]] = phi i64 [ [[CALL_I_US:%.*]], [[INNER_LOOP]] ], [ 0, [[OUTER_LOOP]] ]
932
+ ; CHECKII-NEXT: [[CALL_I_US]] = tail call i64 @payload(i64 [[DIFF_04_I_US]], ptr [[TMP0]], ptr [[TMP1]], i64 [[P:%.*]])
933
+ ; CHECKII-NEXT: [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], -1
934
+ ; CHECKII-NEXT: [[EXITCOND_NOT_I_US:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0
935
+ ; CHECKII-NEXT: br i1 [[EXITCOND_NOT_I_US]], label [[LATCH]], label [[INNER_LOOP]]
936
+ ; CHECKII: latch:
937
+ ; CHECKII-NEXT: [[CMP2_US:%.*]] = icmp sgt i64 [[CALL_I_US]], -1
938
+ ; CHECKII-NEXT: [[DIFF_0_LCSSA_I_LOBIT_US:%.*]] = lshr i64 [[CALL_I_US]], 63
939
+ ; CHECKII-NEXT: [[I_NEXT]] = add nsw i64 [[DIFF_0_LCSSA_I_LOBIT_US]], [[I]]
940
+ ; CHECKII-NEXT: [[INC4_US:%.*]] = zext i1 [[CMP2_US]] to i64
941
+ ; CHECKII-NEXT: [[J_NEXT]] = add nsw i64 [[J]], [[INC4_US]]
942
+ ; CHECKII-NEXT: [[COND_IN_US:%.*]] = select i1 [[CMP2_US]], ptr [[ARRAYIDX1_US]], ptr [[ARRAYIDX_US]]
943
+ ; CHECKII-NEXT: [[COND_US:%.*]] = load ptr, ptr [[COND_IN_US]], align 8
944
+ ; CHECKII-NEXT: [[ARRAYIDX6_US:%.*]] = getelementptr inbounds ptr, ptr [[DST:%.*]], i64 [[K_020_US]]
945
+ ; CHECKII-NEXT: store ptr [[COND_US]], ptr [[ARRAYIDX6_US]], align 8
946
+ ; CHECKII-NEXT: [[INC7_US]] = add i64 [[K_020_US]], 1
947
+ ; CHECKII-NEXT: [[EXITCOND23_NOT:%.*]] = icmp eq i64 [[K_020_US]], 1000
948
+ ; CHECKII-NEXT: br i1 [[EXITCOND23_NOT]], label [[EXIT:%.*]], label [[OUTER_LOOP]]
949
+ ; CHECKII: exit:
950
+ ; CHECKII-NEXT: ret void
951
+ ;
952
+ entry:
953
+ br label %outer.loop
954
+
955
+ outer.loop:
956
+ %k.020.us = phi i64 [ %inc7.us , %latch ], [ 0 , %entry ]
957
+ %j = phi i64 [ %j.next , %latch ], [ 0 , %entry ]
958
+ %i = phi i64 [ %i.next , %latch ], [ 0 , %entry ]
959
+ %arrayidx.us = getelementptr inbounds ptr , ptr %src , i64 %i
960
+ %4 = load ptr , ptr %arrayidx.us , align 8
961
+ %arrayidx1.us = getelementptr inbounds ptr , ptr %src , i64 %j
962
+ %5 = load ptr , ptr %arrayidx1.us , align 8
963
+ br label %inner.loop
964
+
965
+ inner.loop:
966
+ %lsr.iv = phi i64 [ %dim , %outer.loop ], [ %lsr.iv.next , %inner.loop ]
967
+ %diff.04.i.us = phi i64 [ %call.i.us , %inner.loop ], [ 0 , %outer.loop ]
968
+ %call.i.us = tail call i64 @payload (i64 %diff.04.i.us , ptr %4 , ptr %5 , i64 %p )
969
+ %lsr.iv.next = add i64 %lsr.iv , -1
970
+ %exitcond.not.i.us = icmp eq i64 %lsr.iv.next , 0
971
+ br i1 %exitcond.not.i.us , label %latch , label %inner.loop
972
+
973
+ latch:
974
+ %cmp2.us = icmp sgt i64 %call.i.us , -1
975
+ %diff.0.lcssa.i.lobit.us = lshr i64 %call.i.us , 63
976
+ %i.next = add nsw i64 %diff.0.lcssa.i.lobit.us , %i
977
+ %inc4.us = zext i1 %cmp2.us to i64
978
+ %j.next = add nsw i64 %j , %inc4.us
979
+ %cond.in.us = select i1 %cmp2.us , ptr %arrayidx1.us , ptr %arrayidx.us
980
+ %cond.us = load ptr , ptr %cond.in.us , align 8
981
+ %arrayidx6.us = getelementptr inbounds ptr , ptr %dst , i64 %k.020.us
982
+ store ptr %cond.us , ptr %arrayidx6.us , align 8
983
+ %inc7.us = add i64 %k.020.us , 1
984
+ %exitcond23.not = icmp eq i64 %k.020.us , 1000
985
+ br i1 %exitcond23.not , label %exit , label %outer.loop
986
+
987
+ exit:
988
+ ret void
989
+ }
0 commit comments