Skip to content

Commit 0d4c426

Browse files
authored
Fix OpenMP & Tape Error (rust-lang#254)
* Fix tape erasure bug * Fix opm leak
1 parent e0a2944 commit 0d4c426

File tree

5 files changed

+484
-15
lines changed

5 files changed

+484
-15
lines changed

enzyme/Enzyme/AdjointGenerator.h

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3072,6 +3072,75 @@ class AdjointGenerator
30723072
nextTypeInfo, uncacheable_args, subdata, /*AtomicAdd*/ true,
30733073
/*postopt*/ false, /*omp*/ true);
30743074

3075+
if (subdata->returns.find(AugmentedStruct::Tape) !=
3076+
subdata->returns.end()) {
3077+
auto tapeArg = newcalled->arg_end();
3078+
tapeArg--;
3079+
LoadInst *tape = nullptr;
3080+
for (auto u : tapeArg->users()) {
3081+
assert(!tape);
3082+
tape = cast<LoadInst>(u);
3083+
}
3084+
assert(tape);
3085+
std::vector<Value *> extracts;
3086+
if (subdata->tapeIndices.size() == 1) {
3087+
assert(subdata->tapeIndices.begin()->second == -1);
3088+
extracts.push_back(tape);
3089+
} else {
3090+
for (auto a : tape->users()) {
3091+
extracts.push_back(a);
3092+
}
3093+
}
3094+
std::vector<LoadInst *> geps;
3095+
for (auto E : extracts) {
3096+
AllocaInst *AI = nullptr;
3097+
for (auto U : E->users()) {
3098+
if (auto SI = dyn_cast<StoreInst>(U)) {
3099+
assert(SI->getValueOperand() == E);
3100+
AI = cast<AllocaInst>(SI->getPointerOperand());
3101+
}
3102+
}
3103+
if (AI) {
3104+
for (auto U : AI->users()) {
3105+
if (auto LI = dyn_cast<LoadInst>(U)) {
3106+
geps.push_back(LI);
3107+
}
3108+
}
3109+
}
3110+
}
3111+
size_t freeCount = 0;
3112+
for (auto LI : geps) {
3113+
CallInst *freeCall = nullptr;
3114+
for (auto LU : LI->users()) {
3115+
if (auto CI = dyn_cast<CallInst>(LU)) {
3116+
if (auto F = CI->getCalledFunction()) {
3117+
if (F->getName() == "free") {
3118+
freeCall = CI;
3119+
break;
3120+
}
3121+
}
3122+
} else if (auto BC = dyn_cast<CastInst>(LU)) {
3123+
for (auto CU : BC->users()) {
3124+
if (auto CI = dyn_cast<CallInst>(CU)) {
3125+
if (auto F = CI->getCalledFunction()) {
3126+
if (F->getName() == "free") {
3127+
freeCall = CI;
3128+
break;
3129+
}
3130+
}
3131+
}
3132+
}
3133+
if (freeCall)
3134+
break;
3135+
}
3136+
}
3137+
if (freeCall) {
3138+
freeCall->eraseFromParent();
3139+
freeCount++;
3140+
}
3141+
}
3142+
}
3143+
30753144
Value *OutAlloc = nullptr;
30763145
if (OutTypes.size()) {
30773146
auto ST = StructType::get(newcalled->getContext(), OutFPTypes);

enzyme/Enzyme/CacheUtility.cpp

Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -712,7 +712,6 @@ AllocaInst *CacheUtility::createCacheForScope(LimitContext ctx, Type *T,
712712
newFunc->getParent()->getDataLayout().getTypeAllocSizeInBits(myType) /
713713
8);
714714

715-
// if (i != sublimits.size() -1 || !ompOffset)
716715
// Allocate and store the required memory
717716
if (allocateInternal) {
718717

@@ -849,19 +848,17 @@ AllocaInst *CacheUtility::createCacheForScope(LimitContext ctx, Type *T,
849848
}
850849

851850
// Free the memory, if requested
852-
if ((unsigned)i != sublimits.size() - 1 || !ompOffset)
853-
if (shouldFree) {
854-
if (CachePointerInvariantGroups.find(std::make_pair(
855-
(Value *)alloc, i)) == CachePointerInvariantGroups.end()) {
856-
MDNode *invgroup = MDNode::getDistinct(alloc->getContext(), {});
857-
CachePointerInvariantGroups[std::make_pair((Value *)alloc, i)] =
858-
invgroup;
859-
}
860-
freeCache(
861-
containedloops.back().first.preheader, sublimits, i, alloc,
862-
byteSizeOfType, storeInto,
863-
CachePointerInvariantGroups[std::make_pair((Value *)alloc, i)]);
851+
if (shouldFree) {
852+
if (CachePointerInvariantGroups.find(std::make_pair((Value *)alloc, i)) ==
853+
CachePointerInvariantGroups.end()) {
854+
MDNode *invgroup = MDNode::getDistinct(alloc->getContext(), {});
855+
CachePointerInvariantGroups[std::make_pair((Value *)alloc, i)] =
856+
invgroup;
864857
}
858+
freeCache(containedloops.back().first.preheader, sublimits, i, alloc,
859+
byteSizeOfType, storeInto,
860+
CachePointerInvariantGroups[std::make_pair((Value *)alloc, i)]);
861+
}
865862

866863
// If we are not the final iteration, lookup the next pointer by indexing
867864
// into the relevant location of the current chunk allocation

enzyme/Enzyme/GradientUtils.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1447,8 +1447,8 @@ Value *GradientUtils::cacheForReverse(IRBuilder<> &BuilderQ, Value *malloc,
14471447
BuilderQ.CreateInBoundsGEP(ret, ArrayRef<Value *>(tid)));
14481448
}
14491449
} else {
1450-
if (auto ri = dyn_cast<Instruction>(ret))
1451-
erase(ri);
1450+
if (idx >= 0)
1451+
erase(cast<Instruction>(ret));
14521452
IRBuilder<> entryBuilder(inversionAllocs);
14531453
entryBuilder.setFastMathFlags(getFast());
14541454
ret = (idx < 0) ? tape
@@ -1509,6 +1509,9 @@ Value *GradientUtils::cacheForReverse(IRBuilder<> &BuilderQ, Value *malloc,
15091509
assert(malloc);
15101510
bool isi1 = !ignoreType && malloc->getType()->isIntegerTy() &&
15111511
cast<IntegerType>(malloc->getType())->getBitWidth() == 1;
1512+
assert(isa<PointerType>(cache->getType()));
1513+
assert(cast<PointerType>(cache->getType())->getElementType() ==
1514+
ret->getType());
15121515
entryBuilder.CreateStore(ret, cache);
15131516

15141517
auto v = lookupValueFromCache(/*forwardPass*/ true, BuilderQ, lctx, cache,
Lines changed: 263 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,263 @@
1+
; RUN: if [ %llvmver -ge 9 ]; then %opt < %s %loadEnzyme -enzyme -enzyme-preopt=false -mem2reg -instsimplify -adce -loop-deletion -correlated-propagation -simplifycfg -S | FileCheck %s; fi
2+
3+
source_filename = "lulesh.cc"
4+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
5+
target triple = "x86_64-unknown-linux-gnu"
6+
7+
%struct.ident_t = type { i32, i32, i32, i32, i8* }
8+
9+
@0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
10+
@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 514, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i32 0, i32 0) }, align 8
11+
@2 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i32 0, i32 0) }, align 8
12+
13+
; Function Attrs: norecurse nounwind uwtable mustprogress
14+
define dso_local i32 @main(i32 %argc, i8** nocapture readnone %argv) local_unnamed_addr #0 {
15+
entry:
16+
%data = alloca [100 x double], align 16
17+
%d_data = alloca [100 x double], align 16
18+
%0 = bitcast [100 x double]* %data to i8*
19+
%1 = bitcast [100 x double]* %d_data to i8*
20+
call void @_Z17__enzyme_autodiffPvS_S_m(i8* bitcast (void (double*, i64)* @_ZL16LagrangeLeapFrogPdm to i8*), i8* nonnull %0, i8* nonnull %1, i64 100) #5
21+
ret i32 0
22+
}
23+
24+
declare dso_local void @_Z17__enzyme_autodiffPvS_S_m(i8*, i8*, i8*, i64) local_unnamed_addr #2
25+
26+
; Function Attrs: inlinehint nounwind uwtable mustprogress
27+
define internal void @_ZL16LagrangeLeapFrogPdm(double* %e_new, i64 %length) #3 {
28+
entry:
29+
tail call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @2, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, double*)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 %length, double* %e_new)
30+
ret void
31+
}
32+
33+
; Function Attrs: norecurse nounwind uwtable
34+
define internal void @.omp_outlined.(i32* noalias nocapture readonly %.global_tid., i32* noalias nocapture readnone %.bound_tid., i64 %length, double* nocapture nonnull align 8 dereferenceable(8) %tmp) #4 {
35+
entry:
36+
%.omp.lb = alloca i64, align 8
37+
%.omp.ub = alloca i64, align 8
38+
%.omp.stride = alloca i64, align 8
39+
%.omp.is_last = alloca i32, align 4
40+
%sub4 = add i64 %length, -1
41+
%cmp.not = icmp eq i64 %length, 0
42+
br i1 %cmp.not, label %omp.precond.end, label %omp.precond.then
43+
44+
omp.precond.then: ; preds = %entry
45+
%0 = bitcast i64* %.omp.lb to i8*
46+
store i64 0, i64* %.omp.lb, align 8, !tbaa !3
47+
%1 = bitcast i64* %.omp.ub to i8*
48+
store i64 %sub4, i64* %.omp.ub, align 8, !tbaa !3
49+
%2 = bitcast i64* %.omp.stride to i8*
50+
store i64 1, i64* %.omp.stride, align 8, !tbaa !3
51+
%3 = bitcast i32* %.omp.is_last to i8*
52+
store i32 0, i32* %.omp.is_last, align 4, !tbaa !7
53+
%4 = load i32, i32* %.global_tid., align 4, !tbaa !7
54+
call void @__kmpc_for_static_init_8u(%struct.ident_t* nonnull @1, i32 %4, i32 34, i32* nonnull %.omp.is_last, i64* nonnull %.omp.lb, i64* nonnull %.omp.ub, i64* nonnull %.omp.stride, i64 1, i64 1)
55+
%5 = load i64, i64* %.omp.ub, align 8, !tbaa !3
56+
%cmp6 = icmp ugt i64 %5, %sub4
57+
%cond = select i1 %cmp6, i64 %sub4, i64 %5
58+
store i64 %cond, i64* %.omp.ub, align 8, !tbaa !3
59+
%6 = load i64, i64* %.omp.lb, align 8, !tbaa !3
60+
%add29 = add i64 %cond, 1
61+
%cmp730 = icmp ult i64 %6, %add29
62+
br i1 %cmp730, label %omp.inner.for.body, label %omp.loop.exit
63+
64+
omp.inner.for.body: ; preds = %omp.precond.then, %omp.inner.for.body
65+
%.omp.iv.031 = phi i64 [ %add11, %omp.inner.for.body ], [ %6, %omp.precond.then ]
66+
%arrayidx = getelementptr inbounds double, double* %tmp, i64 %.omp.iv.031
67+
%7 = load double, double* %arrayidx, align 8, !tbaa !9
68+
%call = call double @sqrt(double %7) #5
69+
store double %call, double* %arrayidx, align 8, !tbaa !9
70+
%add11 = add nuw i64 %.omp.iv.031, 1
71+
%8 = load i64, i64* %.omp.ub, align 8, !tbaa !3
72+
%add = add i64 %8, 1
73+
%cmp7 = icmp ult i64 %add11, %add
74+
br i1 %cmp7, label %omp.inner.for.body, label %omp.loop.exit
75+
76+
omp.loop.exit: ; preds = %omp.inner.for.body, %omp.precond.then
77+
call void @__kmpc_for_static_fini(%struct.ident_t* nonnull @1, i32 %4)
78+
br label %omp.precond.end
79+
80+
omp.precond.end: ; preds = %omp.loop.exit, %entry
81+
ret void
82+
}
83+
84+
; Function Attrs: nounwind
85+
declare dso_local void @__kmpc_for_static_init_8u(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64, i64) local_unnamed_addr #5
86+
87+
; Function Attrs: nofree nounwind willreturn mustprogress
88+
declare dso_local double @sqrt(double) local_unnamed_addr #6
89+
90+
; Function Attrs: nounwind
91+
declare void @__kmpc_for_static_fini(%struct.ident_t*, i32) local_unnamed_addr #5
92+
93+
; Function Attrs: nounwind
94+
declare !callback !11 void @__kmpc_fork_call(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) local_unnamed_addr #5
95+
96+
attributes #0 = { norecurse nounwind uwtable }
97+
attributes #1 = { argmemonly }
98+
99+
!llvm.module.flags = !{!0, !1}
100+
!llvm.ident = !{!2}
101+
!nvvm.annotations = !{}
102+
103+
!0 = !{i32 1, !"wchar_size", i32 4}
104+
!1 = !{i32 7, !"uwtable", i32 1}
105+
!2 = !{!"clang version 13.0.0 ([email protected]:llvm/llvm-project 619bfe8bd23f76b22f0a53fedafbfc8c97a15f12)"}
106+
!3 = !{!4, !4, i64 0}
107+
!4 = !{!"long", !5, i64 0}
108+
!5 = !{!"omnipotent char", !6, i64 0}
109+
!6 = !{!"Simple C++ TBAA"}
110+
!7 = !{!8, !8, i64 0}
111+
!8 = !{!"int", !5, i64 0}
112+
!9 = !{!10, !10, i64 0}
113+
!10 = !{!"double", !5, i64 0}
114+
!11 = !{!12}
115+
!12 = !{i64 2, i64 -1, i64 -1, i1 true}
116+
117+
118+
; CHECK: define internal void @augmented_.omp_outlined..1(i32* noalias nocapture readonly %.global_tid., i32* noalias nocapture readnone %.bound_tid., i64 %length, double* nocapture nonnull align 8 dereferenceable(8) %tmp, double* nocapture %"tmp'", double** %tape)
119+
; CHECK-NEXT: entry:
120+
; CHECK-NEXT: %0 = load double*, double** %tape
121+
; CHECK-NEXT: %.omp.lb_smpl = alloca i64
122+
; CHECK-NEXT: %.omp.ub_smpl = alloca i64
123+
; CHECK-NEXT: %.omp.stride_smpl = alloca i64
124+
; CHECK-NEXT: %.omp.is_last = alloca i32, align 4
125+
; CHECK-NEXT: %sub4 = add i64 %length, -1
126+
; CHECK-NEXT: %cmp.not = icmp eq i64 %length, 0
127+
; CHECK-NEXT: br i1 %cmp.not, label %omp.precond.end, label %omp.precond.then
128+
129+
; CHECK: omp.precond.then: ; preds = %entry
130+
; CHECK-NEXT: store i32 0, i32* %.omp.is_last, align 4, !tbaa !7
131+
; CHECK-NEXT: %1 = load i32, i32* %.global_tid., align 4, !tbaa !7
132+
; CHECK-NEXT: store i64 0, i64* %.omp.lb_smpl
133+
; CHECK-NEXT: store i64 %sub4, i64* %.omp.ub_smpl
134+
; CHECK-NEXT: store i64 1, i64* %.omp.stride_smpl
135+
; CHECK-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* nonnull @1, i32 %1, i32 34, i32* nonnull %.omp.is_last, i64* nocapture nonnull %.omp.lb_smpl, i64* nocapture nonnull %.omp.ub_smpl, i64* nocapture nonnull %.omp.stride_smpl, i64 1, i64 1)
136+
; CHECK-NEXT: %2 = load i64, i64* %.omp.lb_smpl
137+
; CHECK-NEXT: %3 = load i64, i64* %.omp.ub_smpl
138+
; CHECK-NEXT: %4 = load i64, i64* %.omp.lb_smpl
139+
; CHECK-NEXT: %cmp6 = icmp ugt i64 %3, %sub4
140+
; CHECK-NEXT: %cond = select i1 %cmp6, i64 %sub4, i64 %3
141+
; CHECK-NEXT: %add29 = add i64 %cond, 1
142+
; CHECK-NEXT: %cmp730 = icmp ult i64 %4, %add29
143+
; CHECK-NEXT: br i1 %cmp730, label %omp.inner.for.body, label %omp.loop.exit
144+
145+
; CHECK: omp.inner.for.body: ; preds = %omp.precond.then, %omp.inner.for.body
146+
; CHECK-NEXT: %iv = phi i64 [ %iv.next, %omp.inner.for.body ], [ 0, %omp.precond.then ]
147+
; CHECK-NEXT: %iv.next = add nuw nsw i64 %iv, 1
148+
; CHECK-NEXT: %5 = add i64
149+
; %4, %iv
150+
; CHECK-NEXT: %arrayidx = getelementptr inbounds double, double* %tmp, i64 %5
151+
; CHECK-NEXT: %6 = load double, double* %arrayidx, align 8, !tbaa !9
152+
; CHECK-NEXT: %call = call double @sqrt(double %6)
153+
; CHECK-NEXT: store double %call, double* %arrayidx, align 8, !tbaa !9
154+
; CHECK-NEXT: %7 = add nuw nsw i64 %iv, %2
155+
; CHECK-NEXT: %8 = getelementptr inbounds double, double* %0, i64 %7
156+
; CHECK-NEXT: store double %6, double* %8, align 8, !invariant.group !13
157+
; CHECK-NEXT: %add11 = add nuw i64 %5, 1
158+
; CHECK-NEXT: %add = add nuw i64 %cond, 1
159+
; CHECK-NEXT: %cmp7 = icmp ult i64 %add11, %add
160+
; CHECK-NEXT: br i1 %cmp7, label %omp.inner.for.body, label %omp.loop.exit
161+
162+
; CHECK: omp.loop.exit: ; preds = %omp.inner.for.body, %omp.precond.then
163+
; CHECK-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* nonnull @1, i32 %1)
164+
; CHECK-NEXT: br label %omp.precond.end
165+
166+
; CHECK: omp.precond.end: ; preds = %omp.loop.exit, %entry
167+
; CHECK-NEXT: ret void
168+
; CHECK-NEXT: }
169+
170+
; CHECK: define internal void @diffe.omp_outlined.(i32* noalias nocapture readonly %.global_tid., i32* noalias nocapture readnone %.bound_tid., i64 %length, double* nocapture nonnull align 8 dereferenceable(8) %tmp, double* nocapture %"tmp'", double** %tapeArg)
171+
; CHECK-NEXT: entry:
172+
; CHECK-NEXT: %truetape = load double*, double** %tapeArg
173+
; CHECK-NEXT: %.omp.lb_smpl = alloca i64
174+
; CHECK-NEXT: %.omp.ub_smpl = alloca i64
175+
; CHECK-NEXT: %.omp.stride_smpl = alloca i64
176+
; CHECK-NEXT: %.omp.is_last = alloca i32, align 4
177+
; CHECK-NEXT: %sub4 = add i64 %length, -1
178+
; CHECK-NEXT: %cmp.not = icmp eq i64 %length, 0
179+
; CHECK-NEXT: br i1 %cmp.not, label %invertentry, label %omp.precond.then
180+
181+
; CHECK: omp.precond.then: ; preds = %entry
182+
; CHECK-NEXT: store i32 0, i32* %.omp.is_last, align 4, !tbaa !7
183+
; CHECK-NEXT: %0 = load i32, i32* %.global_tid., align 4, !tbaa !7, !invariant.group !15
184+
; CHECK-NEXT: store i64 0, i64* %.omp.lb_smpl
185+
; CHECK-NEXT: store i64 %sub4, i64* %.omp.ub_smpl
186+
; CHECK-NEXT: store i64 1, i64* %.omp.stride_smpl
187+
; CHECK-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* nonnull @1, i32 %0, i32 34, i32* nonnull %.omp.is_last, i64* nocapture nonnull %.omp.lb_smpl, i64* nocapture nonnull %.omp.ub_smpl, i64* nocapture nonnull %.omp.stride_smpl, i64 1, i64 1)
188+
; CHECK-NEXT: %1 = load i64, i64* %.omp.lb_smpl
189+
; CHECK-NEXT: %2 = load i64, i64* %.omp.ub_smpl
190+
; CHECK-NEXT: %3 = load i64, i64* %.omp.lb_smpl
191+
; CHECK-NEXT: %cmp6 = icmp ugt i64 %2, %sub4
192+
; CHECK-NEXT: %cond = select i1 %cmp6, i64 %sub4, i64 %2
193+
; CHECK-NEXT: %add29 = add i64 %cond, 1
194+
; CHECK-NEXT: %cmp730 = icmp ult i64 %3, %add29
195+
; CHECK-NEXT: br i1 %cmp730, label %omp.inner.for.body, label %invertomp.precond.end
196+
197+
; CHECK: omp.inner.for.body: ; preds = %omp.precond.then, %omp.inner.for.body
198+
; CHECK-NEXT: %iv = phi i64 [ %iv.next, %omp.inner.for.body ], [ 0, %omp.precond.then ]
199+
; CHECK-NEXT: %iv.next = add nuw nsw i64 %iv, 1
200+
; CHECK-NEXT: %4 = add i64
201+
; %3, %iv
202+
; CHECK-NEXT: %5 = add nuw nsw i64 %iv, %1
203+
; CHECK-NEXT: %6 = getelementptr inbounds double, double* %truetape, i64 %5
204+
; CHECK-NEXT: %7 = load double, double* %6, align 8, !invariant.group !19
205+
; CHECK-NEXT: %call = call double @sqrt(double %7)
206+
; CHECK-NEXT: %add11 = add nuw i64 %4, 1
207+
; CHECK-NEXT: %add = add nuw i64 %cond, 1
208+
; CHECK-NEXT: %cmp7 = icmp ult i64 %add11, %add
209+
; CHECK-NEXT: br i1 %cmp7, label %omp.inner.for.body, label %invertomp.precond.end
210+
211+
; CHECK: invertentry: ; preds = %entry, %invertomp.precond.end, %invertomp.precond.then
212+
; CHECK-NEXT: ret void
213+
214+
; CHECK: invertomp.precond.then: ; preds = %invertomp.inner.for.body, %invertomp.loop.exit
215+
; CHECK-NEXT: %_unwrap = load i32, i32* %.global_tid., align 4, !tbaa !7, !invariant.group !15
216+
; CHECK-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @1, i32 %_unwrap)
217+
; CHECK-NEXT: br label %invertentry
218+
219+
; CHECK: invertomp.inner.for.body: ; preds = %invertomp.loop.exit.loopexit, %incinvertomp.inner.for.body
220+
; CHECK-NEXT: %"iv'ac.0" = phi i64 [ %_unwrap9, %invertomp.loop.exit.loopexit ], [ %19, %incinvertomp.inner.for.body ]
221+
; CHECK-NEXT: %_unwrap2 = load i64, i64* %.omp.lb_smpl
222+
; CHECK-NEXT: %_unwrap3 = add i64 %_unwrap2, %"iv'ac.0"
223+
; CHECK-NEXT: %"arrayidx'ipg_unwrap" = getelementptr inbounds double, double* %"tmp'", i64 %_unwrap3
224+
; CHECK-NEXT: %8 = load double, double* %"arrayidx'ipg_unwrap", align 8
225+
; CHECK-NEXT: store double 0.000000e+00, double* %"arrayidx'ipg_unwrap", align 8
226+
; CHECK-NEXT: %_unwrap5 = load i64, i64* %.omp.lb_smpl
227+
; CHECK-NEXT: %9 = add nuw nsw i64 %"iv'ac.0", %_unwrap5
228+
; CHECK-NEXT: %10 = getelementptr inbounds double, double* %truetape, i64 %9
229+
; CHECK-NEXT: %11 = load double, double* %10, align 8, !invariant.group !20
230+
; CHECK-NEXT: %12 = call fast double @llvm.sqrt.f64(double %11)
231+
; CHECK-NEXT: %13 = fmul fast double 5.000000e-01, %8
232+
; CHECK-NEXT: %14 = fdiv fast double %13, %12
233+
; CHECK-NEXT: %15 = fcmp fast oeq double %11, 0.000000e+00
234+
; CHECK-NEXT: %16 = select fast i1 %15, double 0.000000e+00, double %14
235+
; CHECK-NEXT: %17 = atomicrmw fadd double* %"arrayidx'ipg_unwrap", double %16 monotonic
236+
; CHECK-NEXT: %18 = icmp eq i64 %"iv'ac.0", 0
237+
; CHECK-NEXT: br i1 %18, label %invertomp.precond.then, label %incinvertomp.inner.for.body
238+
239+
; CHECK: incinvertomp.inner.for.body: ; preds = %invertomp.inner.for.body
240+
; CHECK-NEXT: %19 = add nsw i64 %"iv'ac.0", -1
241+
; CHECK-NEXT: br label %invertomp.inner.for.body
242+
243+
; CHECK: invertomp.loop.exit.loopexit: ; preds = %invertomp.loop.exit
244+
; CHECK-NEXT: %_unwrap7 = load i64, i64* %.omp.ub_smpl
245+
; CHECK-NEXT: %cmp6_unwrap = icmp ugt i64 %_unwrap7, %sub4
246+
; CHECK-NEXT: %cond_unwrap = select i1 %cmp6_unwrap, i64 %sub4, i64 %_unwrap7
247+
; CHECK-NEXT: %_unwrap8 = load i64, i64* %.omp.lb_smpl
248+
; CHECK-NEXT: %_unwrap9 = sub i64 %cond_unwrap, %_unwrap8
249+
; CHECK-NEXT: br label %invertomp.inner.for.body
250+
251+
; CHECK: invertomp.loop.exit: ; preds = %invertomp.precond.end
252+
; CHECK-NEXT: %_unwrap10 = load i64, i64* %.omp.lb_smpl
253+
; CHECK-NEXT: %_unwrap11 = load i64, i64* %.omp.ub_smpl
254+
; CHECK-NEXT: %cmp6_unwrap12 = icmp ugt i64 %_unwrap11, %sub4
255+
; CHECK-NEXT: %cond_unwrap13 = select i1 %cmp6_unwrap12, i64 %sub4, i64 %_unwrap11
256+
; CHECK-NEXT: %add29_unwrap = add i64 %cond_unwrap13, 1
257+
; CHECK-NEXT: %cmp730_unwrap = icmp ult i64 %_unwrap10, %add29_unwrap
258+
; CHECK-NEXT: br i1 %cmp730_unwrap, label %invertomp.loop.exit.loopexit, label %invertomp.precond.then
259+
260+
; CHECK: invertomp.precond.end: ; preds = %omp.inner.for.body, %omp.precond.then
261+
; CHECK-NEXT: br i1 %cmp.not, label %invertentry, label %invertomp.loop.exit
262+
; CHECK-NEXT: }
263+

0 commit comments

Comments
 (0)