@@ -15,83 +15,39 @@ define void @test(i32 noundef %nface, i32 noundef %ncell, ptr noalias noundef %f
15
15
; CHECK: [[FOR_BODY_PREHEADER]]:
16
16
; CHECK-NEXT: [[TMP0:%.*]] = zext nneg i32 [[NFACE]] to i64
17
17
; CHECK-NEXT: [[INVARIANT_GEP:%.*]] = getelementptr inbounds i32, ptr [[FACE_CELL]], i64 [[TMP0]]
18
- ; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP0]], 3
19
18
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[NFACE]], 4
20
- ; CHECK-NEXT: br i1 [[TMP1]], label %[[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA :.*]], label %[[FOR_BODY_PREHEADER_NEW :.*]]
21
- ; CHECK: [[FOR_BODY_PREHEADER_NEW ]]:
22
- ; CHECK-NEXT: [[UNROLL_ITER :%.*]] = and i64 [[TMP0]], 2147483644
19
+ ; CHECK-NEXT: br i1 [[TMP1]], label %[[FOR_BODY_PREHEADER14 :.*]], label %[[VECTOR_PH :.*]]
20
+ ; CHECK: [[FOR_BODY_PREHEADER14 ]]:
21
+ ; CHECK-NEXT: [[INDVARS_IV_PH :%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[UNROLL_ITER:%.*]], %[[MIDDLE_BLOCK:.*]] ]
23
22
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
24
- ; CHECK: [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA]]:
25
- ; CHECK-NEXT: [[INDVARS_IV_UNR:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT_3:%.*]], %[[FOR_BODY]] ]
26
- ; CHECK-NEXT: [[LCMP_MOD_NOT:%.*]] = icmp eq i64 [[XTRAITER]], 0
27
- ; CHECK-NEXT: br i1 [[LCMP_MOD_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY_EPIL:.*]]
28
- ; CHECK: [[FOR_BODY_EPIL]]:
29
- ; CHECK-NEXT: [[INDVARS_IV_EPIL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_EPIL:%.*]], %[[FOR_BODY_EPIL]] ], [ [[INDVARS_IV_UNR]], %[[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA]] ]
30
- ; CHECK-NEXT: [[EPIL_ITER:%.*]] = phi i64 [ [[EPIL_ITER_NEXT:%.*]], %[[FOR_BODY_EPIL]] ], [ 0, %[[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA]] ]
23
+ ; CHECK: [[VECTOR_PH]]:
24
+ ; CHECK-NEXT: [[UNROLL_ITER]] = and i64 [[TMP0]], 2147483644
25
+ ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
26
+ ; CHECK: [[VECTOR_BODY]]:
27
+ ; CHECK-NEXT: [[INDVARS_IV_EPIL:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
31
28
; CHECK-NEXT: [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i32, ptr [[FACE_CELL]], i64 [[INDVARS_IV_EPIL]]
32
- ; CHECK-NEXT: [[TMP2 :%.*]] = load i32, ptr [[ARRAYIDX_EPIL]], align 4, !tbaa [[TBAA0:![0-9]+]], !llvm.access.group [[ACC_GRP4:![0-9]+]]
29
+ ; CHECK-NEXT: [[WIDE_LOAD :%.*]] = load <4 x i32> , ptr [[ARRAYIDX_EPIL]], align 4, !tbaa [[TBAA0:![0-9]+]], !llvm.access.group [[ACC_GRP4:![0-9]+]]
33
30
; CHECK-NEXT: [[GEP_EPIL:%.*]] = getelementptr inbounds i32, ptr [[INVARIANT_GEP]], i64 [[INDVARS_IV_EPIL]]
34
- ; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[GEP_EPIL]], align 4, !tbaa [[TBAA0]], !llvm.access.group [[ACC_GRP4]]
35
- ; CHECK-NEXT: [[IDXPROM3_EPIL:%.*]] = sext i32 [[TMP2]] to i64
36
- ; CHECK-NEXT: [[ARRAYIDX4_EPIL:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[IDXPROM3_EPIL]]
37
- ; CHECK-NEXT: [[IDXPROM5_EPIL:%.*]] = sext i32 [[TMP3]] to i64
38
- ; CHECK-NEXT: [[ARRAYIDX6_EPIL:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[IDXPROM5_EPIL]]
39
- ; CHECK-NEXT: [[TMP4:%.*]] = load double, ptr [[ARRAYIDX4_EPIL]], align 8
40
- ; CHECK-NEXT: [[TMP5:%.*]] = load double, ptr [[ARRAYIDX6_EPIL]], align 8
41
- ; CHECK-NEXT: [[CMP_I_EPIL:%.*]] = fcmp fast olt double [[TMP4]], [[TMP5]]
42
- ; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[CMP_I_EPIL]], double [[TMP5]], double [[TMP4]]
43
- ; CHECK-NEXT: store double [[TMP6]], ptr [[ARRAYIDX4_EPIL]], align 8, !tbaa [[TBAA5:![0-9]+]], !llvm.access.group [[ACC_GRP4]]
44
- ; CHECK-NEXT: [[INDVARS_IV_NEXT_EPIL]] = add nuw nsw i64 [[INDVARS_IV_EPIL]], 1
45
- ; CHECK-NEXT: [[EPIL_ITER_NEXT]] = add i64 [[EPIL_ITER]], 1
46
- ; CHECK-NEXT: [[EPIL_ITER_CMP_NOT:%.*]] = icmp eq i64 [[EPIL_ITER_NEXT]], [[XTRAITER]]
47
- ; CHECK-NEXT: br i1 [[EPIL_ITER_CMP_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY_EPIL]], !llvm.loop [[LOOP7:![0-9]+]]
31
+ ; CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load <4 x i32>, ptr [[GEP_EPIL]], align 4, !tbaa [[TBAA0]], !llvm.access.group [[ACC_GRP4]]
32
+ ; CHECK-NEXT: [[TMP3:%.*]] = sext <4 x i32> [[WIDE_LOAD]] to <4 x i64>
33
+ ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds double, ptr [[Y]], <4 x i64> [[TMP3]]
34
+ ; CHECK-NEXT: [[TMP5:%.*]] = sext <4 x i32> [[WIDE_LOAD12]] to <4 x i64>
35
+ ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, ptr [[X]], <4 x i64> [[TMP5]]
36
+ ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = tail call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> [[TMP4]], i32 8, <4 x i1> splat (i1 true), <4 x double> poison), !llvm.access.group [[ACC_GRP4]]
37
+ ; CHECK-NEXT: [[WIDE_MASKED_GATHER13:%.*]] = tail call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> [[TMP6]], i32 8, <4 x i1> splat (i1 true), <4 x double> poison), !llvm.access.group [[ACC_GRP4]]
38
+ ; CHECK-NEXT: [[TMP7:%.*]] = fcmp fast olt <4 x double> [[WIDE_MASKED_GATHER]], [[WIDE_MASKED_GATHER13]]
39
+ ; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP7]], <4 x double> [[WIDE_MASKED_GATHER13]], <4 x double> [[WIDE_MASKED_GATHER]]
40
+ ; CHECK-NEXT: tail call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> [[TMP8]], <4 x ptr> [[TMP4]], i32 8, <4 x i1> splat (i1 true)), !tbaa [[TBAA5:![0-9]+]], !llvm.access.group [[ACC_GRP4]]
41
+ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDVARS_IV_EPIL]], 4
42
+ ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[UNROLL_ITER]]
43
+ ; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
44
+ ; CHECK: [[MIDDLE_BLOCK]]:
45
+ ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UNROLL_ITER]], [[TMP0]]
46
+ ; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY_PREHEADER14]]
48
47
; CHECK: [[FOR_COND_CLEANUP]]:
49
48
; CHECK-NEXT: ret void
50
49
; CHECK: [[FOR_BODY]]:
51
- ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER_NEW]] ], [ [[INDVARS_IV_NEXT_3]], %[[FOR_BODY]] ]
52
- ; CHECK-NEXT: [[NITER:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER_NEW]] ], [ [[NITER_NEXT_3:%.*]], %[[FOR_BODY]] ]
53
- ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[FACE_CELL]], i64 [[INDVARS_IV]]
54
- ; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA0]], !llvm.access.group [[ACC_GRP4]]
55
- ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[INVARIANT_GEP]], i64 [[INDVARS_IV]]
56
- ; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[GEP]], align 4, !tbaa [[TBAA0]], !llvm.access.group [[ACC_GRP4]]
57
- ; CHECK-NEXT: [[IDXPROM3:%.*]] = sext i32 [[TMP7]] to i64
58
- ; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[IDXPROM3]]
59
- ; CHECK-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP8]] to i64
60
- ; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[IDXPROM5]]
61
- ; CHECK-NEXT: [[TMP9:%.*]] = load double, ptr [[ARRAYIDX4]], align 8
62
- ; CHECK-NEXT: [[TMP10:%.*]] = load double, ptr [[ARRAYIDX6]], align 8
63
- ; CHECK-NEXT: [[CMP_I:%.*]] = fcmp fast olt double [[TMP9]], [[TMP10]]
64
- ; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[CMP_I]], double [[TMP10]], double [[TMP9]]
65
- ; CHECK-NEXT: store double [[TMP11]], ptr [[ARRAYIDX4]], align 8, !tbaa [[TBAA5]], !llvm.access.group [[ACC_GRP4]]
66
- ; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = or disjoint i64 [[INDVARS_IV]], 1
67
- ; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, ptr [[FACE_CELL]], i64 [[INDVARS_IV_NEXT]]
68
- ; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX_1]], align 4, !tbaa [[TBAA0]], !llvm.access.group [[ACC_GRP4]]
69
- ; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, ptr [[INVARIANT_GEP]], i64 [[INDVARS_IV_NEXT]]
70
- ; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[GEP_1]], align 4, !tbaa [[TBAA0]], !llvm.access.group [[ACC_GRP4]]
71
- ; CHECK-NEXT: [[IDXPROM3_1:%.*]] = sext i32 [[TMP12]] to i64
72
- ; CHECK-NEXT: [[ARRAYIDX4_1:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[IDXPROM3_1]]
73
- ; CHECK-NEXT: [[IDXPROM5_1:%.*]] = sext i32 [[TMP13]] to i64
74
- ; CHECK-NEXT: [[ARRAYIDX6_1:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[IDXPROM5_1]]
75
- ; CHECK-NEXT: [[TMP14:%.*]] = load double, ptr [[ARRAYIDX4_1]], align 8
76
- ; CHECK-NEXT: [[TMP15:%.*]] = load double, ptr [[ARRAYIDX6_1]], align 8
77
- ; CHECK-NEXT: [[CMP_I_1:%.*]] = fcmp fast olt double [[TMP14]], [[TMP15]]
78
- ; CHECK-NEXT: [[TMP16:%.*]] = select i1 [[CMP_I_1]], double [[TMP15]], double [[TMP14]]
79
- ; CHECK-NEXT: store double [[TMP16]], ptr [[ARRAYIDX4_1]], align 8, !tbaa [[TBAA5]], !llvm.access.group [[ACC_GRP4]]
80
- ; CHECK-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = or disjoint i64 [[INDVARS_IV]], 2
81
- ; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, ptr [[FACE_CELL]], i64 [[INDVARS_IV_NEXT_1]]
82
- ; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX_2]], align 4, !tbaa [[TBAA0]], !llvm.access.group [[ACC_GRP4]]
83
- ; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i32, ptr [[INVARIANT_GEP]], i64 [[INDVARS_IV_NEXT_1]]
84
- ; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[GEP_2]], align 4, !tbaa [[TBAA0]], !llvm.access.group [[ACC_GRP4]]
85
- ; CHECK-NEXT: [[IDXPROM3_2:%.*]] = sext i32 [[TMP17]] to i64
86
- ; CHECK-NEXT: [[ARRAYIDX4_2:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[IDXPROM3_2]]
87
- ; CHECK-NEXT: [[IDXPROM5_2:%.*]] = sext i32 [[TMP18]] to i64
88
- ; CHECK-NEXT: [[ARRAYIDX6_2:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[IDXPROM5_2]]
89
- ; CHECK-NEXT: [[TMP19:%.*]] = load double, ptr [[ARRAYIDX4_2]], align 8
90
- ; CHECK-NEXT: [[TMP20:%.*]] = load double, ptr [[ARRAYIDX6_2]], align 8
91
- ; CHECK-NEXT: [[CMP_I_2:%.*]] = fcmp fast olt double [[TMP19]], [[TMP20]]
92
- ; CHECK-NEXT: [[TMP21:%.*]] = select i1 [[CMP_I_2]], double [[TMP20]], double [[TMP19]]
93
- ; CHECK-NEXT: store double [[TMP21]], ptr [[ARRAYIDX4_2]], align 8, !tbaa [[TBAA5]], !llvm.access.group [[ACC_GRP4]]
94
- ; CHECK-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = or disjoint i64 [[INDVARS_IV]], 3
50
+ ; CHECK-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ [[INDVARS_IV_PH]], %[[FOR_BODY_PREHEADER14]] ]
95
51
; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, ptr [[FACE_CELL]], i64 [[INDVARS_IV_NEXT_2]]
96
52
; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX_3]], align 4, !tbaa [[TBAA0]], !llvm.access.group [[ACC_GRP4]]
97
53
; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, ptr [[INVARIANT_GEP]], i64 [[INDVARS_IV_NEXT_2]]
@@ -100,15 +56,14 @@ define void @test(i32 noundef %nface, i32 noundef %ncell, ptr noalias noundef %f
100
56
; CHECK-NEXT: [[ARRAYIDX4_3:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[IDXPROM3_3]]
101
57
; CHECK-NEXT: [[IDXPROM5_3:%.*]] = sext i32 [[TMP23]] to i64
102
58
; CHECK-NEXT: [[ARRAYIDX6_3:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[IDXPROM5_3]]
103
- ; CHECK-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX4_3]], align 8
104
- ; CHECK-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX6_3]], align 8
59
+ ; CHECK-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX4_3]], align 8, !llvm.access.group [[ACC_GRP4]]
60
+ ; CHECK-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX6_3]], align 8, !llvm.access.group [[ACC_GRP4]]
105
61
; CHECK-NEXT: [[CMP_I_3:%.*]] = fcmp fast olt double [[TMP24]], [[TMP25]]
106
62
; CHECK-NEXT: [[TMP26:%.*]] = select i1 [[CMP_I_3]], double [[TMP25]], double [[TMP24]]
107
63
; CHECK-NEXT: store double [[TMP26]], ptr [[ARRAYIDX4_3]], align 8, !tbaa [[TBAA5]], !llvm.access.group [[ACC_GRP4]]
108
- ; CHECK-NEXT: [[INDVARS_IV_NEXT_3]] = add nuw nsw i64 [[INDVARS_IV]], 4
109
- ; CHECK-NEXT: [[NITER_NEXT_3]] = add i64 [[NITER]], 4
110
- ; CHECK-NEXT: [[NITER_NCMP_3:%.*]] = icmp eq i64 [[NITER_NEXT_3]], [[UNROLL_ITER]]
111
- ; CHECK-NEXT: br i1 [[NITER_NCMP_3]], label %[[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA]], label %[[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
64
+ ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV_NEXT_2]], 1
65
+ ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[TMP0]]
66
+ ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
112
67
;
113
68
entry:
114
69
%nface.addr = alloca i32 , align 4
@@ -242,10 +197,10 @@ attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: re
242
197
; CHECK: [[ACC_GRP4]] = distinct !{}
243
198
; CHECK: [[TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0}
244
199
; CHECK: [[META6]] = !{!"double", [[META2]], i64 0}
245
- ; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META8:![0-9]+]]}
246
- ; CHECK: [[META8]] = !{!"llvm.loop.unroll.disable "}
247
- ; CHECK: [[LOOP9 ]] = distinct !{[[LOOP9]], [[META10:![0-9]+]] , [[META11:![0-9]+]], [[META12:![0-9]+ ]]}
248
- ; CHECK: [[META10]] = !{!"llvm.loop.mustprogress" }
249
- ; CHECK: [[META11]] = !{!"llvm.loop.parallel_accesses", [[ACC_GRP4]] }
250
- ; CHECK: [[META12 ]] = !{!"llvm.loop.vectorize.enable", i1 true }
200
+ ; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META8:![0-9]+]], [[META9:![0-9]+]], [[META10:![0-9]+]], [[META11:![0-9]+]] }
201
+ ; CHECK: [[META8]] = !{!"llvm.loop.mustprogress "}
202
+ ; CHECK: [[META9 ]] = !{!"llvm.loop.parallel_accesses" , [[ACC_GRP4 ]]}
203
+ ; CHECK: [[META10]] = !{!"llvm.loop.isvectorized", i32 1 }
204
+ ; CHECK: [[META11]] = !{!"llvm.loop.unroll.runtime.disable" }
205
+ ; CHECK: [[LOOP12 ]] = distinct !{[[LOOP12]], [[META8]], [[META9]], [[META11]], [[META10]] }
251
206
;.
0 commit comments