Skip to content

Commit 7d16f40

Browse files
authored
Add inverted load test (rust-lang#252)
1 parent 0d4c426 commit 7d16f40

File tree

1 file changed

+97
-0
lines changed

1 file changed

+97
-0
lines changed
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
; RUN: %opt < %s %loadEnzyme -enzyme -enzyme-preopt=false -mem2reg -instsimplify -adce -loop-deletion -correlated-propagation -simplifycfg -S | FileCheck %s
2+
3+
declare i8* @malloc(i64)
4+
declare void @free(i8*)
5+
6+
; Function Attrs: norecurse nounwind readonly uwtable
7+
define dso_local double @subsum(i64** %off, double* nocapture readonly %x, i64 %n) #0 {
8+
entry:
9+
br label %for.body
10+
11+
for.cond.cleanup: ; preds = %for.body
12+
ret double %add
13+
14+
for.body: ; preds = %entry, %for.body
15+
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
16+
%total.07 = phi double [ 0.000000e+00, %entry ], [ %add, %for.body ]
17+
%pidx = getelementptr inbounds i64*, i64** %off, i64 %indvars.iv
18+
%qidx = load i64*, i64** %pidx, align 8
19+
%idx = load i64, i64* %qidx, align 8
20+
%arrayidx = getelementptr inbounds double, double* %x, i64 %idx
21+
%0 = load double, double* %arrayidx, align 8
22+
%add = fadd fast double %0, %total.07
23+
%indvars.iv.next = add nuw i64 %indvars.iv, 1
24+
%exitcond = icmp eq i64 %indvars.iv, %n
25+
br i1 %exitcond, label %for.cond.cleanup, label %for.body
26+
}
27+
28+
define dso_local double @sum(i64** %off, double* nocapture readonly %x, i64 %n) #0 {
29+
entry:
30+
%res = call double @subsum(i64** %off, double* %x, i64 %n)
31+
store double 0.000000e+00, double* %x
32+
store i64* null, i64** %off
33+
ret double %res
34+
}
35+
36+
; Function Attrs: nounwind uwtable
37+
define dso_local void @dsum(i64** %off, i64** %doff, double* %x, double* %xp, i64 %n) local_unnamed_addr #1 {
38+
entry:
39+
%0 = tail call double (double (i64**, double*, i64)*, ...) @__enzyme_autodiff(double (i64**, double*, i64)* nonnull @sum, metadata !"enzyme_dup", i64** %off, i64** %doff, double* %x, double* %xp, i64 %n)
40+
ret void
41+
}
42+
43+
; Function Attrs: nounwind
44+
declare double @__enzyme_autodiff(double (i64**, double*, i64)*, ...) #2
45+
46+
attributes #0 = { norecurse nounwind readonly uwtable }
47+
attributes #1 = { nounwind uwtable }
48+
attributes #2 = { nounwind }
49+
50+
; CHECK: define internal i64* @augmented_subsum(i64** %off, i64** %"off'", double* nocapture readonly %x, double* nocapture %"x'", i64 %n)
51+
; CHECK-NEXT: entry:
52+
; CHECK-NEXT: %0 = add nuw i64 %n, 1
53+
; CHECK-NEXT: %mallocsize = mul nuw nsw i64 %0, 8
54+
; CHECK-NEXT: %malloccall = tail call noalias nonnull i8* @malloc(i64 %mallocsize)
55+
; CHECK-NEXT: %idx_malloccache = bitcast i8* %malloccall to i64*
56+
; CHECK-NEXT: br label %for.body
57+
58+
; CHECK: for.cond.cleanup: ; preds = %for.body
59+
; CHECK-NEXT: ret i64* %idx_malloccache
60+
61+
; CHECK: for.body: ; preds = %for.body, %entry
62+
; CHECK-NEXT: %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
63+
; CHECK-NEXT: %iv.next = add nuw nsw i64 %iv, 1
64+
; CHECK-NEXT: %pidx = getelementptr inbounds i64*, i64** %off, i64 %iv
65+
; CHECK-NEXT: %qidx = load i64*, i64** %pidx, align 8
66+
; CHECK-NEXT: %idx = load i64, i64* %qidx, align 8
67+
; CHECK-NEXT: %1 = getelementptr inbounds i64, i64* %idx_malloccache, i64 %iv
68+
; CHECK-NEXT: store i64 %idx, i64* %1, align 8, !invariant.group !0
69+
; CHECK-NEXT: %exitcond = icmp eq i64 %iv, %n
70+
; CHECK-NEXT: br i1 %exitcond, label %for.cond.cleanup, label %for.body
71+
; CHECK-NEXT: }
72+
73+
; CHECK: define internal void @diffesubsum(i64** %off, i64** %"off'", double* nocapture readonly %x, double* nocapture %"x'", i64 %n, double %differeturn, i64* %tapeArg)
74+
; CHECK-NEXT: entry:
75+
; CHECK-NEXT: br label %invertfor.body
76+
77+
; CHECK: invertentry: ; preds = %invertfor.body
78+
; CHECK-NEXT: %0 = bitcast i64* %tapeArg to i8*
79+
; CHECK-NEXT: tail call void @free(i8* nonnull %0)
80+
; CHECK-NEXT: ret void
81+
82+
; CHECK: invertfor.body: ; preds = %incinvertfor.body, %entry
83+
; CHECK-NEXT: %"iv'ac.0" = phi i64 [ %n, %entry ], [ %7, %incinvertfor.body ]
84+
; CHECK-NEXT: %1 = getelementptr inbounds i64, i64* %tapeArg, i64 %"iv'ac.0"
85+
; CHECK-NEXT: %2 = load i64, i64* %1, align 8, !invariant.group !1
86+
; CHECK-NEXT: %"arrayidx'ipg_unwrap" = getelementptr inbounds double, double* %"x'", i64 %2
87+
; CHECK-NEXT: %3 = load double, double* %"arrayidx'ipg_unwrap", align 8
88+
; CHECK-NEXT: %4 = fadd fast double %3, %differeturn
89+
; CHECK-NEXT: store double %4, double* %"arrayidx'ipg_unwrap", align 8
90+
; CHECK-NEXT: %5 = icmp eq i64 %"iv'ac.0", 0
91+
; CHECK-NEXT: %6 = select{{( fast)?}} i1 %5, double 0.000000e+00, double %differeturn
92+
; CHECK-NEXT: br i1 %5, label %invertentry, label %incinvertfor.body
93+
94+
; CHECK: incinvertfor.body: ; preds = %invertfor.body
95+
; CHECK-NEXT: %7 = add nsw i64 %"iv'ac.0", -1
96+
; CHECK-NEXT: br label %invertfor.body
97+
; CHECK-NEXT: }

0 commit comments

Comments
 (0)