1
+ ; RUN: %opt < %s %loadEnzyme -enzyme -enzyme-preopt=false -mem2reg -sroa -instsimplify -simplifycfg -adce -S | FileCheck %s
2
+
3
+ ; #include <stdio.h>
4
+
5
+ ; double __enzyme_fwddiff(void*, ...);
6
+
7
+ ; __attribute__((noinline))
8
+ ; void square_(const double* src, double* dest) {
9
+ ; *dest = *src * *src;
10
+ ; }
11
+
12
+ ; double square(double x) {
13
+ ; double y;
14
+ ; square_(&x, &y);
15
+ ; return y;
16
+ ; }
17
+
18
+ ; double dsquare(double x) {
19
+ ; return __enzyme_fwddiff((void*)square, x, 1.0);
20
+ ; }
21
+
22
+
23
+ define dso_local void @square_ (double * nocapture readonly %src , double * nocapture %dest ) local_unnamed_addr #0 {
24
+ entry:
25
+ %0 = load double , double * %src , align 8
26
+ %mul = fmul double %0 , %0
27
+ store double %mul , double * %dest , align 8
28
+ ret void
29
+ }
30
+
31
+ define dso_local double @square (double %x ) #1 {
32
+ entry:
33
+ %x.addr = alloca double , align 8
34
+ %y = alloca double , align 8
35
+ store double %x , double * %x.addr , align 8
36
+ %0 = bitcast double * %y to i8*
37
+ call void @llvm.lifetime.start.p0i8 (i64 8 , i8* nonnull %0 ) #4
38
+ call void @square_ (double * nonnull %x.addr , double * nonnull %y )
39
+ %1 = load double , double * %y , align 8
40
+ call void @llvm.lifetime.end.p0i8 (i64 8 , i8* nonnull %0 ) #4
41
+ ret double %1
42
+ }
43
+
44
+ declare void @llvm.lifetime.start.p0i8 (i64 , i8* nocapture ) #2
45
+
46
+ declare void @llvm.lifetime.end.p0i8 (i64 , i8* nocapture ) #2
47
+
48
+ define dso_local double @dsquare (double %x ) local_unnamed_addr #1 {
49
+ entry:
50
+ %call = tail call double (i8* , ...) @__enzyme_fwddiff (i8* bitcast (double (double )* @square to i8* ), double %x , double 1 .000000e+00 ) #4
51
+ ret double %call
52
+ }
53
+
54
+ declare dso_local double @__enzyme_fwddiff (i8* , ...) local_unnamed_addr #3
55
+
56
+ attributes #0 = { norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math" ="false" "disable-tail-calls" ="false" "less-precise-fpmad" ="false" "min-legal-vector-width" ="0" "no-frame-pointer-elim" ="false" "no-infs-fp-math" ="false" "no-jump-tables" ="false" "no-nans-fp-math" ="false" "no-signed-zeros-fp-math" ="false" "no-trapping-math" ="false" "stack-protector-buffer-size" ="8" "target-cpu" ="x86-64" "target-features" ="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math" ="false" "use-soft-float" ="false" }
57
+ attributes #1 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math" ="false" "disable-tail-calls" ="false" "less-precise-fpmad" ="false" "min-legal-vector-width" ="0" "no-frame-pointer-elim" ="false" "no-infs-fp-math" ="false" "no-jump-tables" ="false" "no-nans-fp-math" ="false" "no-signed-zeros-fp-math" ="false" "no-trapping-math" ="false" "stack-protector-buffer-size" ="8" "target-cpu" ="x86-64" "target-features" ="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math" ="false" "use-soft-float" ="false" }
58
+ attributes #2 = { argmemonly nounwind }
59
+ attributes #3 = { "correctly-rounded-divide-sqrt-fp-math" ="false" "disable-tail-calls" ="false" "less-precise-fpmad" ="false" "no-frame-pointer-elim" ="false" "no-infs-fp-math" ="false" "no-nans-fp-math" ="false" "no-signed-zeros-fp-math" ="false" "no-trapping-math" ="false" "stack-protector-buffer-size" ="8" "target-cpu" ="x86-64" "target-features" ="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math" ="false" "use-soft-float" ="false" }
60
+ attributes #4 = { nounwind }
61
+
62
+
63
+ ; CHECK: define internal double @fwddiffesquare(double %x, double %"x'")
64
+ ; CHECK-NEXT: entry:
65
+ ; CHECK-NEXT: %"x.addr'ipa" = alloca double, align 8
66
+ ; CHECK-NEXT: store double 0.000000e+00, double* %"x.addr'ipa", align 8
67
+ ; CHECK-NEXT: %x.addr = alloca double, align 8
68
+ ; CHECK-NEXT: %"y'ipa" = alloca double, align 8
69
+ ; CHECK-NEXT: store double 0.000000e+00, double* %"y'ipa", align 8
70
+ ; CHECK-NEXT: %y = alloca double, align 8
71
+ ; CHECK-NEXT: store double %x, double* %x.addr, align 8
72
+ ; CHECK-NEXT: store double %"x'", double* %"x.addr'ipa", align 8
73
+ ; CHECK-NEXT: call void @fwddiffesquare_(double* %x.addr, double* %"x.addr'ipa", double* %y, double* %"y'ipa")
74
+ ; CHECK-NEXT: %0 = load double, double* %"y'ipa", align 8
75
+ ; CHECK-NEXT: ret double %0
76
+ ; CHECK-NEXT: }
77
+
78
+ ; CHECK: define internal void @fwddiffesquare_(double* nocapture readonly %src, double* nocapture %"src'", double* nocapture %dest, double* nocapture %"dest'")
79
+ ; CHECK-NEXT: entry:
80
+ ; CHECK-NEXT: %0 = load double, double* %src, align 8
81
+ ; CHECK-NEXT: %1 = load double, double* %"src'", align 8
82
+ ; CHECK-NEXT: %mul = fmul double %0, %0
83
+ ; CHECK-NEXT: %2 = fmul fast double %1, %0
84
+ ; CHECK-NEXT: %3 = fmul fast double %1, %0
85
+ ; CHECK-NEXT: %4 = fadd fast double %2, %3
86
+ ; CHECK-NEXT: store double %mul, double* %dest, align 8
87
+ ; CHECK-NEXT: store double %4, double* %"dest'", align 8
88
+ ; CHECK-NEXT: ret void
89
+ ; CHECK-NEXT: }
0 commit comments