Skip to content

Commit b09b217

Browse files
committed
[Local] Only intersect llvm.access.group metadata if instr moves.
Preserve llvm.access.group metadata on the replacement instruction, if it does not move. In that case, the program would be UB, if the parallel property encoded in the metadata does not hold.
1 parent 61057b0 commit b09b217

File tree

4 files changed

+55
-100
lines changed

4 files changed

+55
-100
lines changed

llvm/lib/Transforms/Utils/Local.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3336,8 +3336,9 @@ void llvm::combineMetadata(Instruction *K, const Instruction *J,
33363336
K->setMetadata(Kind, MDNode::intersect(JMD, KMD));
33373337
break;
33383338
case LLVMContext::MD_access_group:
3339-
K->setMetadata(LLVMContext::MD_access_group,
3340-
intersectAccessGroups(K, J));
3339+
if (DoesKMove)
3340+
K->setMetadata(LLVMContext::MD_access_group,
3341+
intersectAccessGroups(K, J));
33413342
break;
33423343
case LLVMContext::MD_range:
33433344
if (DoesKMove || !K->hasMetadata(LLVMContext::MD_noundef))

llvm/test/Transforms/InstCombine/intersect-accessgroup.ll

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,9 @@
1212
; }
1313
; }
1414
;
15-
; Check for correctly merging access group metadata for instcombine
16-
; (only common loops are parallel == intersection)
17-
; Note that combined load would be parallel to loop !16 since both
18-
; origin loads are parallel to it, but it references two access groups
19-
; (!8 and !9), neither of which contain both loads. As such, the
20-
; information that the combined load is parallel to !16 is lost.
15+
; Check that the original access group on %0 is preserved when replacing uses
16+
; of %1 with it, as %0 is not moved and if %0 would not be parallel in the
17+
; original loop it would be UB.
2118
;
2219
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
2320

@@ -107,7 +104,9 @@ for.end32:
107104
; CHECK: load double, {{.*}} !llvm.access.group ![[ACCESSGROUP_0:[0-9]+]]
108105
; CHECK: br label %for.cond14, !llvm.loop ![[LOOP_4:[0-9]+]]
109106

110-
; CHECK: ![[ACCESSGROUP_0]] = distinct !{}
107+
; CHECK: ![[ACCESSGROUP_0]] = !{![[G1:[0-9]+]], ![[G2:[0-9]+]]}
108+
; CHECK: ![[G1]] = distinct !{}
109+
; CHECK: ![[G2]] = distinct !{}
111110

112111
; CHECK: ![[LOOP_4]] = distinct !{![[LOOP_4]], ![[PARALLEL_ACCESSES_5:[0-9]+]]}
113-
; CHECK: ![[PARALLEL_ACCESSES_5]] = !{!"llvm.loop.parallel_accesses", ![[ACCESSGROUP_0]]}
112+
; CHECK: ![[PARALLEL_ACCESSES_5]] = !{!"llvm.loop.parallel_accesses", ![[G1]]}

llvm/test/Transforms/InstCombine/loadstore-metadata.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -205,8 +205,8 @@ entry:
205205
define double @preserve_load_metadata_after_select_transform2(ptr %a, ptr %b) {
206206
; CHECK-LABEL: @preserve_load_metadata_after_select_transform2(
207207
; CHECK-NEXT: entry:
208-
; CHECK-NEXT: [[L_A:%.*]] = load double, ptr [[A:%.*]], align 8
209-
; CHECK-NEXT: [[L_B:%.*]] = load double, ptr [[B:%.*]], align 8
208+
; CHECK-NEXT: [[L_A:%.*]] = load double, ptr [[A:%.*]], align 8, !llvm.access.group [[META6]]
209+
; CHECK-NEXT: [[L_B:%.*]] = load double, ptr [[B:%.*]], align 8, !llvm.access.group [[META6]]
210210
; CHECK-NEXT: [[CMP_I:%.*]] = fcmp fast olt double [[L_A]], [[L_B]]
211211
; CHECK-NEXT: [[L_SEL:%.*]] = select i1 [[CMP_I]], double [[L_B]], double [[L_A]]
212212
; CHECK-NEXT: ret double [[L_SEL]]
@@ -224,8 +224,8 @@ entry:
224224
define double @preserve_load_metadata_after_select_transform_metadata_missing_1(ptr %a, ptr %b) {
225225
; CHECK-LABEL: @preserve_load_metadata_after_select_transform_metadata_missing_1(
226226
; CHECK-NEXT: entry:
227-
; CHECK-NEXT: [[L_A:%.*]] = load double, ptr [[A:%.*]], align 8
228-
; CHECK-NEXT: [[L_B:%.*]] = load double, ptr [[B:%.*]], align 8
227+
; CHECK-NEXT: [[L_A:%.*]] = load double, ptr [[A:%.*]], align 8, !llvm.access.group [[META6]]
228+
; CHECK-NEXT: [[L_B:%.*]] = load double, ptr [[B:%.*]], align 8, !llvm.access.group [[META6]]
229229
; CHECK-NEXT: [[CMP_I:%.*]] = fcmp fast olt double [[L_A]], [[L_B]]
230230
; CHECK-NEXT: [[L_SEL:%.*]] = select i1 [[CMP_I]], double [[L_B]], double [[L_A]]
231231
; CHECK-NEXT: ret double [[L_SEL]]
@@ -242,8 +242,8 @@ entry:
242242
define double @preserve_load_metadata_after_select_transform_metadata_missing_2(ptr %a, ptr %b) {
243243
; CHECK-LABEL: @preserve_load_metadata_after_select_transform_metadata_missing_2(
244244
; CHECK-NEXT: entry:
245-
; CHECK-NEXT: [[L_A:%.*]] = load double, ptr [[A:%.*]], align 8
246-
; CHECK-NEXT: [[L_B:%.*]] = load double, ptr [[B:%.*]], align 8
245+
; CHECK-NEXT: [[L_A:%.*]] = load double, ptr [[A:%.*]], align 8, !llvm.access.group [[META6]]
246+
; CHECK-NEXT: [[L_B:%.*]] = load double, ptr [[B:%.*]], align 8, !llvm.access.group [[META6]]
247247
; CHECK-NEXT: [[CMP_I:%.*]] = fcmp fast olt double [[L_A]], [[L_B]]
248248
; CHECK-NEXT: [[L_SEL:%.*]] = select i1 [[CMP_I]], double [[L_B]], double [[L_A]]
249249
; CHECK-NEXT: ret double [[L_SEL]]
@@ -261,8 +261,8 @@ entry:
261261
define double @preserve_load_metadata_after_select_transform_metadata_missing_3(ptr %a, ptr %b) {
262262
; CHECK-LABEL: @preserve_load_metadata_after_select_transform_metadata_missing_3(
263263
; CHECK-NEXT: entry:
264-
; CHECK-NEXT: [[L_A:%.*]] = load double, ptr [[A:%.*]], align 8
265-
; CHECK-NEXT: [[L_B:%.*]] = load double, ptr [[B:%.*]], align 8
264+
; CHECK-NEXT: [[L_A:%.*]] = load double, ptr [[A:%.*]], align 8, !llvm.access.group [[META6]]
265+
; CHECK-NEXT: [[L_B:%.*]] = load double, ptr [[B:%.*]], align 8, !llvm.access.group [[META6]]
266266
; CHECK-NEXT: [[CMP_I:%.*]] = fcmp fast olt double [[L_A]], [[L_B]]
267267
; CHECK-NEXT: [[L_SEL:%.*]] = select i1 [[CMP_I]], double [[L_B]], double [[L_A]]
268268
; CHECK-NEXT: ret double [[L_SEL]]

llvm/test/Transforms/PhaseOrdering/X86/preserve-access-group.ll

Lines changed: 37 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -15,83 +15,39 @@ define void @test(i32 noundef %nface, i32 noundef %ncell, ptr noalias noundef %f
1515
; CHECK: [[FOR_BODY_PREHEADER]]:
1616
; CHECK-NEXT: [[TMP0:%.*]] = zext nneg i32 [[NFACE]] to i64
1717
; CHECK-NEXT: [[INVARIANT_GEP:%.*]] = getelementptr inbounds i32, ptr [[FACE_CELL]], i64 [[TMP0]]
18-
; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP0]], 3
1918
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[NFACE]], 4
20-
; CHECK-NEXT: br i1 [[TMP1]], label %[[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA:.*]], label %[[FOR_BODY_PREHEADER_NEW:.*]]
21-
; CHECK: [[FOR_BODY_PREHEADER_NEW]]:
22-
; CHECK-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[TMP0]], 2147483644
19+
; CHECK-NEXT: br i1 [[TMP1]], label %[[FOR_BODY_PREHEADER14:.*]], label %[[VECTOR_PH:.*]]
20+
; CHECK: [[FOR_BODY_PREHEADER14]]:
21+
; CHECK-NEXT: [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[UNROLL_ITER:%.*]], %[[MIDDLE_BLOCK:.*]] ]
2322
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
24-
; CHECK: [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA]]:
25-
; CHECK-NEXT: [[INDVARS_IV_UNR:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT_3:%.*]], %[[FOR_BODY]] ]
26-
; CHECK-NEXT: [[LCMP_MOD_NOT:%.*]] = icmp eq i64 [[XTRAITER]], 0
27-
; CHECK-NEXT: br i1 [[LCMP_MOD_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY_EPIL:.*]]
28-
; CHECK: [[FOR_BODY_EPIL]]:
29-
; CHECK-NEXT: [[INDVARS_IV_EPIL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_EPIL:%.*]], %[[FOR_BODY_EPIL]] ], [ [[INDVARS_IV_UNR]], %[[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA]] ]
30-
; CHECK-NEXT: [[EPIL_ITER:%.*]] = phi i64 [ [[EPIL_ITER_NEXT:%.*]], %[[FOR_BODY_EPIL]] ], [ 0, %[[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA]] ]
23+
; CHECK: [[VECTOR_PH]]:
24+
; CHECK-NEXT: [[UNROLL_ITER]] = and i64 [[TMP0]], 2147483644
25+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
26+
; CHECK: [[VECTOR_BODY]]:
27+
; CHECK-NEXT: [[INDVARS_IV_EPIL:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
3128
; CHECK-NEXT: [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i32, ptr [[FACE_CELL]], i64 [[INDVARS_IV_EPIL]]
32-
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX_EPIL]], align 4, !tbaa [[TBAA0:![0-9]+]], !llvm.access.group [[ACC_GRP4:![0-9]+]]
29+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[ARRAYIDX_EPIL]], align 4, !tbaa [[TBAA0:![0-9]+]], !llvm.access.group [[ACC_GRP4:![0-9]+]]
3330
; CHECK-NEXT: [[GEP_EPIL:%.*]] = getelementptr inbounds i32, ptr [[INVARIANT_GEP]], i64 [[INDVARS_IV_EPIL]]
34-
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[GEP_EPIL]], align 4, !tbaa [[TBAA0]], !llvm.access.group [[ACC_GRP4]]
35-
; CHECK-NEXT: [[IDXPROM3_EPIL:%.*]] = sext i32 [[TMP2]] to i64
36-
; CHECK-NEXT: [[ARRAYIDX4_EPIL:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[IDXPROM3_EPIL]]
37-
; CHECK-NEXT: [[IDXPROM5_EPIL:%.*]] = sext i32 [[TMP3]] to i64
38-
; CHECK-NEXT: [[ARRAYIDX6_EPIL:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[IDXPROM5_EPIL]]
39-
; CHECK-NEXT: [[TMP4:%.*]] = load double, ptr [[ARRAYIDX4_EPIL]], align 8
40-
; CHECK-NEXT: [[TMP5:%.*]] = load double, ptr [[ARRAYIDX6_EPIL]], align 8
41-
; CHECK-NEXT: [[CMP_I_EPIL:%.*]] = fcmp fast olt double [[TMP4]], [[TMP5]]
42-
; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[CMP_I_EPIL]], double [[TMP5]], double [[TMP4]]
43-
; CHECK-NEXT: store double [[TMP6]], ptr [[ARRAYIDX4_EPIL]], align 8, !tbaa [[TBAA5:![0-9]+]], !llvm.access.group [[ACC_GRP4]]
44-
; CHECK-NEXT: [[INDVARS_IV_NEXT_EPIL]] = add nuw nsw i64 [[INDVARS_IV_EPIL]], 1
45-
; CHECK-NEXT: [[EPIL_ITER_NEXT]] = add i64 [[EPIL_ITER]], 1
46-
; CHECK-NEXT: [[EPIL_ITER_CMP_NOT:%.*]] = icmp eq i64 [[EPIL_ITER_NEXT]], [[XTRAITER]]
47-
; CHECK-NEXT: br i1 [[EPIL_ITER_CMP_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY_EPIL]], !llvm.loop [[LOOP7:![0-9]+]]
31+
; CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load <4 x i32>, ptr [[GEP_EPIL]], align 4, !tbaa [[TBAA0]], !llvm.access.group [[ACC_GRP4]]
32+
; CHECK-NEXT: [[TMP3:%.*]] = sext <4 x i32> [[WIDE_LOAD]] to <4 x i64>
33+
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds double, ptr [[Y]], <4 x i64> [[TMP3]]
34+
; CHECK-NEXT: [[TMP5:%.*]] = sext <4 x i32> [[WIDE_LOAD12]] to <4 x i64>
35+
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, ptr [[X]], <4 x i64> [[TMP5]]
36+
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = tail call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> [[TMP4]], i32 8, <4 x i1> splat (i1 true), <4 x double> poison), !llvm.access.group [[ACC_GRP4]]
37+
; CHECK-NEXT: [[WIDE_MASKED_GATHER13:%.*]] = tail call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> [[TMP6]], i32 8, <4 x i1> splat (i1 true), <4 x double> poison), !llvm.access.group [[ACC_GRP4]]
38+
; CHECK-NEXT: [[TMP7:%.*]] = fcmp fast olt <4 x double> [[WIDE_MASKED_GATHER]], [[WIDE_MASKED_GATHER13]]
39+
; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP7]], <4 x double> [[WIDE_MASKED_GATHER13]], <4 x double> [[WIDE_MASKED_GATHER]]
40+
; CHECK-NEXT: tail call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> [[TMP8]], <4 x ptr> [[TMP4]], i32 8, <4 x i1> splat (i1 true)), !tbaa [[TBAA5:![0-9]+]], !llvm.access.group [[ACC_GRP4]]
41+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDVARS_IV_EPIL]], 4
42+
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[UNROLL_ITER]]
43+
; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
44+
; CHECK: [[MIDDLE_BLOCK]]:
45+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UNROLL_ITER]], [[TMP0]]
46+
; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY_PREHEADER14]]
4847
; CHECK: [[FOR_COND_CLEANUP]]:
4948
; CHECK-NEXT: ret void
5049
; CHECK: [[FOR_BODY]]:
51-
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER_NEW]] ], [ [[INDVARS_IV_NEXT_3]], %[[FOR_BODY]] ]
52-
; CHECK-NEXT: [[NITER:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER_NEW]] ], [ [[NITER_NEXT_3:%.*]], %[[FOR_BODY]] ]
53-
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[FACE_CELL]], i64 [[INDVARS_IV]]
54-
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA0]], !llvm.access.group [[ACC_GRP4]]
55-
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[INVARIANT_GEP]], i64 [[INDVARS_IV]]
56-
; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[GEP]], align 4, !tbaa [[TBAA0]], !llvm.access.group [[ACC_GRP4]]
57-
; CHECK-NEXT: [[IDXPROM3:%.*]] = sext i32 [[TMP7]] to i64
58-
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[IDXPROM3]]
59-
; CHECK-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP8]] to i64
60-
; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[IDXPROM5]]
61-
; CHECK-NEXT: [[TMP9:%.*]] = load double, ptr [[ARRAYIDX4]], align 8
62-
; CHECK-NEXT: [[TMP10:%.*]] = load double, ptr [[ARRAYIDX6]], align 8
63-
; CHECK-NEXT: [[CMP_I:%.*]] = fcmp fast olt double [[TMP9]], [[TMP10]]
64-
; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[CMP_I]], double [[TMP10]], double [[TMP9]]
65-
; CHECK-NEXT: store double [[TMP11]], ptr [[ARRAYIDX4]], align 8, !tbaa [[TBAA5]], !llvm.access.group [[ACC_GRP4]]
66-
; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = or disjoint i64 [[INDVARS_IV]], 1
67-
; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, ptr [[FACE_CELL]], i64 [[INDVARS_IV_NEXT]]
68-
; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX_1]], align 4, !tbaa [[TBAA0]], !llvm.access.group [[ACC_GRP4]]
69-
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, ptr [[INVARIANT_GEP]], i64 [[INDVARS_IV_NEXT]]
70-
; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[GEP_1]], align 4, !tbaa [[TBAA0]], !llvm.access.group [[ACC_GRP4]]
71-
; CHECK-NEXT: [[IDXPROM3_1:%.*]] = sext i32 [[TMP12]] to i64
72-
; CHECK-NEXT: [[ARRAYIDX4_1:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[IDXPROM3_1]]
73-
; CHECK-NEXT: [[IDXPROM5_1:%.*]] = sext i32 [[TMP13]] to i64
74-
; CHECK-NEXT: [[ARRAYIDX6_1:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[IDXPROM5_1]]
75-
; CHECK-NEXT: [[TMP14:%.*]] = load double, ptr [[ARRAYIDX4_1]], align 8
76-
; CHECK-NEXT: [[TMP15:%.*]] = load double, ptr [[ARRAYIDX6_1]], align 8
77-
; CHECK-NEXT: [[CMP_I_1:%.*]] = fcmp fast olt double [[TMP14]], [[TMP15]]
78-
; CHECK-NEXT: [[TMP16:%.*]] = select i1 [[CMP_I_1]], double [[TMP15]], double [[TMP14]]
79-
; CHECK-NEXT: store double [[TMP16]], ptr [[ARRAYIDX4_1]], align 8, !tbaa [[TBAA5]], !llvm.access.group [[ACC_GRP4]]
80-
; CHECK-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = or disjoint i64 [[INDVARS_IV]], 2
81-
; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, ptr [[FACE_CELL]], i64 [[INDVARS_IV_NEXT_1]]
82-
; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX_2]], align 4, !tbaa [[TBAA0]], !llvm.access.group [[ACC_GRP4]]
83-
; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i32, ptr [[INVARIANT_GEP]], i64 [[INDVARS_IV_NEXT_1]]
84-
; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[GEP_2]], align 4, !tbaa [[TBAA0]], !llvm.access.group [[ACC_GRP4]]
85-
; CHECK-NEXT: [[IDXPROM3_2:%.*]] = sext i32 [[TMP17]] to i64
86-
; CHECK-NEXT: [[ARRAYIDX4_2:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[IDXPROM3_2]]
87-
; CHECK-NEXT: [[IDXPROM5_2:%.*]] = sext i32 [[TMP18]] to i64
88-
; CHECK-NEXT: [[ARRAYIDX6_2:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[IDXPROM5_2]]
89-
; CHECK-NEXT: [[TMP19:%.*]] = load double, ptr [[ARRAYIDX4_2]], align 8
90-
; CHECK-NEXT: [[TMP20:%.*]] = load double, ptr [[ARRAYIDX6_2]], align 8
91-
; CHECK-NEXT: [[CMP_I_2:%.*]] = fcmp fast olt double [[TMP19]], [[TMP20]]
92-
; CHECK-NEXT: [[TMP21:%.*]] = select i1 [[CMP_I_2]], double [[TMP20]], double [[TMP19]]
93-
; CHECK-NEXT: store double [[TMP21]], ptr [[ARRAYIDX4_2]], align 8, !tbaa [[TBAA5]], !llvm.access.group [[ACC_GRP4]]
94-
; CHECK-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = or disjoint i64 [[INDVARS_IV]], 3
50+
; CHECK-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ [[INDVARS_IV_PH]], %[[FOR_BODY_PREHEADER14]] ]
9551
; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, ptr [[FACE_CELL]], i64 [[INDVARS_IV_NEXT_2]]
9652
; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX_3]], align 4, !tbaa [[TBAA0]], !llvm.access.group [[ACC_GRP4]]
9753
; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, ptr [[INVARIANT_GEP]], i64 [[INDVARS_IV_NEXT_2]]
@@ -100,15 +56,14 @@ define void @test(i32 noundef %nface, i32 noundef %ncell, ptr noalias noundef %f
10056
; CHECK-NEXT: [[ARRAYIDX4_3:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[IDXPROM3_3]]
10157
; CHECK-NEXT: [[IDXPROM5_3:%.*]] = sext i32 [[TMP23]] to i64
10258
; CHECK-NEXT: [[ARRAYIDX6_3:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[IDXPROM5_3]]
103-
; CHECK-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX4_3]], align 8
104-
; CHECK-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX6_3]], align 8
59+
; CHECK-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX4_3]], align 8, !llvm.access.group [[ACC_GRP4]]
60+
; CHECK-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX6_3]], align 8, !llvm.access.group [[ACC_GRP4]]
10561
; CHECK-NEXT: [[CMP_I_3:%.*]] = fcmp fast olt double [[TMP24]], [[TMP25]]
10662
; CHECK-NEXT: [[TMP26:%.*]] = select i1 [[CMP_I_3]], double [[TMP25]], double [[TMP24]]
10763
; CHECK-NEXT: store double [[TMP26]], ptr [[ARRAYIDX4_3]], align 8, !tbaa [[TBAA5]], !llvm.access.group [[ACC_GRP4]]
108-
; CHECK-NEXT: [[INDVARS_IV_NEXT_3]] = add nuw nsw i64 [[INDVARS_IV]], 4
109-
; CHECK-NEXT: [[NITER_NEXT_3]] = add i64 [[NITER]], 4
110-
; CHECK-NEXT: [[NITER_NCMP_3:%.*]] = icmp eq i64 [[NITER_NEXT_3]], [[UNROLL_ITER]]
111-
; CHECK-NEXT: br i1 [[NITER_NCMP_3]], label %[[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA]], label %[[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
64+
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV_NEXT_2]], 1
65+
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[TMP0]]
66+
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
11267
;
11368
entry:
11469
%nface.addr = alloca i32, align 4
@@ -242,10 +197,10 @@ attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: re
242197
; CHECK: [[ACC_GRP4]] = distinct !{}
243198
; CHECK: [[TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0}
244199
; CHECK: [[META6]] = !{!"double", [[META2]], i64 0}
245-
; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META8:![0-9]+]]}
246-
; CHECK: [[META8]] = !{!"llvm.loop.unroll.disable"}
247-
; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META10:![0-9]+]], [[META11:![0-9]+]], [[META12:![0-9]+]]}
248-
; CHECK: [[META10]] = !{!"llvm.loop.mustprogress"}
249-
; CHECK: [[META11]] = !{!"llvm.loop.parallel_accesses", [[ACC_GRP4]]}
250-
; CHECK: [[META12]] = !{!"llvm.loop.vectorize.enable", i1 true}
200+
; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META8:![0-9]+]], [[META9:![0-9]+]], [[META10:![0-9]+]], [[META11:![0-9]+]]}
201+
; CHECK: [[META8]] = !{!"llvm.loop.mustprogress"}
202+
; CHECK: [[META9]] = !{!"llvm.loop.parallel_accesses", [[ACC_GRP4]]}
203+
; CHECK: [[META10]] = !{!"llvm.loop.isvectorized", i32 1}
204+
; CHECK: [[META11]] = !{!"llvm.loop.unroll.runtime.disable"}
205+
; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META8]], [[META9]], [[META11]], [[META10]]}
251206
;.

0 commit comments

Comments
 (0)