|
1 | 1 | ; RUN: opt < %s -loop-vectorize -pass-remarks-analysis='loop-vectorize' -mtriple=x86_64-unknown-linux -S 2>&1 | FileCheck %s
|
2 | 2 |
|
3 | 3 | ; Verify analysis remarks are generated when interleaving is not beneficial.
|
4 |
| -; CHECK: remark: vectorization-remarks-profitable.c:5:17: the cost-model indicates that vectorization is not beneficial |
5 |
| -; CHECK: remark: vectorization-remarks-profitable.c:5:17: the cost-model indicates that interleaving is not beneficial and is explicitly disabled or interleave count is set to 1 |
6 |
| -; CHECK: remark: vectorization-remarks-profitable.c:12:17: the cost-model indicates that vectorization is not beneficial |
7 |
| -; CHECK: remark: vectorization-remarks-profitable.c:12:17: the cost-model indicates that interleaving is not beneficial |
| 4 | +; CHECK: remark: vectorization-remarks-profitable.c:4:14: the cost-model indicates that vectorization is not beneficial |
| 5 | +; CHECK: remark: vectorization-remarks-profitable.c:4:14: the cost-model indicates that interleaving is not beneficial and is explicitly disabled or interleave count is set to 1 |
| 6 | +; CHECK: remark: vectorization-remarks-profitable.c:11:14: the cost-model indicates that vectorization is not beneficial |
| 7 | +; CHECK: remark: vectorization-remarks-profitable.c:11:14: the cost-model indicates that interleaving is not beneficial |
8 | 8 |
|
9 | 9 | ; First loop.
|
10 | 10 | ; #pragma clang loop interleave(disable) unroll(disable)
|
11 | 11 | ; for(int i = 0; i < n; i++) {
|
12 |
| -; out[i] = *in[i]; |
| 12 | +; out[i] = in[i]; |
13 | 13 | ; }
|
14 | 14 |
|
15 | 15 | ; Second loop.
|
16 | 16 | ; #pragma clang loop unroll(disable)
|
17 | 17 | ; for(int i = 0; i < n; i++) {
|
18 |
| -; out[i] = *in[i]; |
| 18 | +; out[i] = in[i]; |
19 | 19 | ; }
|
20 | 20 |
|
21 |
| -; ModuleID = 'vectorization-remarks-profitable.ll' |
22 | 21 | target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
|
23 | 22 | target triple = "x86_64-apple-macosx10.10.0"
|
24 | 23 |
|
25 |
| -; Function Attrs: nounwind uwtable |
26 |
| -define void @do_not_interleave(float** noalias nocapture readonly %in, float* noalias nocapture %out, i32 %size) #0 { |
| 24 | +; Function Attrs: nounwind ssp uwtable |
| 25 | +define void @do_not_interleave(float* nocapture %out, float* nocapture readonly %in, i32 %n) #0 { |
27 | 26 | entry:
|
28 |
| - %cmp.4 = icmp eq i32 %size, 0, !dbg !10 |
29 |
| - br i1 %cmp.4, label %for.end, label %for.body.preheader, !dbg !11 |
| 27 | + %cmp.7 = icmp sgt i32 %n, 0, !dbg !3 |
| 28 | + br i1 %cmp.7, label %for.body.preheader, label %for.cond.cleanup, !dbg !8 |
30 | 29 |
|
31 | 30 | for.body.preheader: ; preds = %entry
|
32 |
| - br label %for.body, !dbg !12 |
| 31 | + br label %for.body, !dbg !9 |
| 32 | + |
| 33 | +for.cond.cleanup.loopexit: ; preds = %for.body |
| 34 | + br label %for.cond.cleanup, !dbg !10 |
| 35 | + |
| 36 | +for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry |
| 37 | + ret void, !dbg !10 |
33 | 38 |
|
34 | 39 | for.body: ; preds = %for.body.preheader, %for.body
|
35 | 40 | %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
|
36 |
| - %arrayidx = getelementptr inbounds float*, float** %in, i64 %indvars.iv, !dbg !12 |
37 |
| - %0 = bitcast float** %arrayidx to i32**, !dbg !12 |
38 |
| - %1 = load i32*, i32** %0, align 8, !dbg !12 |
39 |
| - %2 = load i32, i32* %1, align 4, !dbg !13 |
40 |
| - %arrayidx2 = getelementptr inbounds float, float* %out, i64 %indvars.iv, !dbg !14 |
41 |
| - %3 = bitcast float* %arrayidx2 to i32*, !dbg !15 |
42 |
| - store i32 %2, i32* %3, align 4, !dbg !15 |
43 |
| - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !11 |
44 |
| - %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !11 |
45 |
| - %exitcond = icmp eq i32 %lftr.wideiv, %size, !dbg !11 |
46 |
| - br i1 %exitcond, label %for.end.loopexit, label %for.body, !dbg !11, !llvm.loop !16 |
47 |
| - |
48 |
| -for.end.loopexit: ; preds = %for.body |
49 |
| - br label %for.end, !dbg !19 |
50 |
| - |
51 |
| -for.end: ; preds = %for.end.loopexit, %entry |
52 |
| - ret void, !dbg !19 |
| 41 | + %arrayidx = getelementptr inbounds float, float* %in, i64 %indvars.iv, !dbg !9 |
| 42 | + %0 = bitcast float* %arrayidx to i32*, !dbg !9 |
| 43 | + %1 = load i32, i32* %0, align 4, !dbg !9, !tbaa !11 |
| 44 | + %arrayidx2 = getelementptr inbounds float, float* %out, i64 %indvars.iv, !dbg !15 |
| 45 | + %2 = bitcast float* %arrayidx2 to i32*, !dbg !16 |
| 46 | + store i32 %1, i32* %2, align 4, !dbg !16, !tbaa !11 |
| 47 | + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !8 |
| 48 | + %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !8 |
| 49 | + %exitcond = icmp eq i32 %lftr.wideiv, %n, !dbg !8 |
| 50 | + br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body, !dbg !8, !llvm.loop !17 |
53 | 51 | }
|
54 | 52 |
|
55 |
| -; Function Attrs: nounwind uwtable |
56 |
| -define void @interleave_not_profitable(float** noalias nocapture readonly %in, float* noalias nocapture %out, i32 %size) #0 { |
| 53 | +; Function Attrs: nounwind ssp uwtable |
| 54 | +define void @interleave_not_profitable(float* nocapture %out, float* nocapture readonly %in, i32 %n) #0 { |
57 | 55 | entry:
|
58 |
| - %cmp.4 = icmp eq i32 %size, 0, !dbg !20 |
59 |
| - br i1 %cmp.4, label %for.end, label %for.body, !dbg !21 |
60 |
| - |
61 |
| -for.body: ; preds = %entry, %for.body |
62 |
| - %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] |
63 |
| - %arrayidx = getelementptr inbounds float*, float** %in, i64 %indvars.iv, !dbg !22 |
64 |
| - %0 = bitcast float** %arrayidx to i32**, !dbg !22 |
65 |
| - %1 = load i32*, i32** %0, align 8, !dbg !22 |
66 |
| - %2 = load i32, i32* %1, align 4, !dbg !23 |
67 |
| - %arrayidx2 = getelementptr inbounds float, float* %out, i64 %indvars.iv, !dbg !24 |
68 |
| - %3 = bitcast float* %arrayidx2 to i32*, !dbg !25 |
69 |
| - store i32 %2, i32* %3, align 4, !dbg !25 |
70 |
| - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !21 |
71 |
| - %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !21 |
72 |
| - %exitcond = icmp eq i32 %lftr.wideiv, %size, !dbg !21 |
73 |
| - br i1 %exitcond, label %for.end, label %for.body, !dbg !21, !llvm.loop !26 |
74 |
| - |
75 |
| -for.end: ; preds = %for.body, %entry |
76 |
| - ret void, !dbg !27 |
77 |
| -} |
| 56 | + %cmp.7 = icmp sgt i32 %n, 0, !dbg !20 |
| 57 | + br i1 %cmp.7, label %for.body.preheader, label %for.cond.cleanup, !dbg !22 |
| 58 | + |
| 59 | +for.body.preheader: ; preds = %entry |
| 60 | + br label %for.body, !dbg !23 |
78 | 61 |
|
79 |
| -attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" } |
80 |
| - |
81 |
| -!llvm.dbg.cu = !{!0} |
82 |
| -!llvm.module.flags = !{!7, !8} |
83 |
| -!llvm.ident = !{!9} |
84 |
| - |
85 |
| -!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 250016)", isOptimized: false, runtimeVersion: 0, emissionKind: 2, enums: !2, subprograms: !3) |
86 |
| -!1 = !DIFile(filename: "vectorization-remarks-profitable.c", directory: "") |
87 |
| -!2 = !{} |
88 |
| -!3 = !{!4, !6} |
89 |
| -!4 = distinct !DISubprogram(name: "do_not_interleave", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, function: void (float**, float*, i32)* @do_not_interleave, variables: !2) |
90 |
| -!5 = !DISubroutineType(types: !2) |
91 |
| -!6 = distinct !DISubprogram(name: "interleave_not_profitable", scope: !1, file: !1, line: 8, type: !5, isLocal: false, isDefinition: true, scopeLine: 8, flags: DIFlagPrototyped, isOptimized: false, function: void (float**, float*, i32)* @interleave_not_profitable, variables: !2) |
92 |
| -!7 = !{i32 2, !"Dwarf Version", i32 4} |
93 |
| -!8 = !{i32 2, !"Debug Info Version", i32 3} |
94 |
| -!9 = !{!"clang version 3.8.0 (trunk 250016)"} |
95 |
| -!10 = !DILocation(line: 4, column: 23, scope: !4) |
96 |
| -!11 = !DILocation(line: 4, column: 3, scope: !4) |
97 |
| -!12 = !DILocation(line: 5, column: 17, scope: !4) |
98 |
| -!13 = !DILocation(line: 5, column: 16, scope: !4) |
99 |
| -!14 = !DILocation(line: 5, column: 7, scope: !4) |
100 |
| -!15 = !DILocation(line: 5, column: 14, scope: !4) |
101 |
| -!16 = distinct !{!16, !17, !18} |
102 |
| -!17 = !{!"llvm.loop.interleave.count", i32 1} |
103 |
| -!18 = !{!"llvm.loop.unroll.disable"} |
104 |
| -!19 = !DILocation(line: 6, column: 1, scope: !4) |
105 |
| -!20 = !DILocation(line: 11, column: 23, scope: !6) |
106 |
| -!21 = !DILocation(line: 11, column: 3, scope: !6) |
107 |
| -!22 = !DILocation(line: 12, column: 17, scope: !6) |
108 |
| -!23 = !DILocation(line: 12, column: 16, scope: !6) |
109 |
| -!24 = !DILocation(line: 12, column: 7, scope: !6) |
110 |
| -!25 = !DILocation(line: 12, column: 14, scope: !6) |
111 |
| -!26 = distinct !{!26, !18} |
112 |
| -!27 = !DILocation(line: 13, column: 1, scope: !6) |
| 62 | +for.cond.cleanup.loopexit: ; preds = %for.body |
| 63 | + br label %for.cond.cleanup, !dbg !24 |
| 64 | + |
| 65 | +for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry |
| 66 | + ret void, !dbg !24 |
| 67 | + |
| 68 | +for.body: ; preds = %for.body.preheader, %for.body |
| 69 | + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] |
| 70 | + %arrayidx = getelementptr inbounds float, float* %in, i64 %indvars.iv, !dbg !23 |
| 71 | + %0 = bitcast float* %arrayidx to i32*, !dbg !23 |
| 72 | + %1 = load i32, i32* %0, align 4, !dbg !23, !tbaa !11 |
| 73 | + %arrayidx2 = getelementptr inbounds float, float* %out, i64 %indvars.iv, !dbg !25 |
| 74 | + %2 = bitcast float* %arrayidx2 to i32*, !dbg !26 |
| 75 | + store i32 %1, i32* %2, align 4, !dbg !26, !tbaa !11 |
| 76 | + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !22 |
| 77 | + %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !22 |
| 78 | + %exitcond = icmp eq i32 %lftr.wideiv, %n, !dbg !22 |
| 79 | + br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body, !dbg !22, !llvm.loop !27 |
| 80 | +} |
113 | 81 |
|
| 82 | +attributes #0 = { nounwind } |
| 83 | + |
| 84 | +!llvm.module.flags = !{!0, !1} |
| 85 | +!llvm.ident = !{!2} |
| 86 | + |
| 87 | +!0 = !{i32 2, !"Debug Info Version", i32 3} |
| 88 | +!1 = !{i32 1, !"PIC Level", i32 2} |
| 89 | +!2 = !{!"clang version 3.7.0"} |
| 90 | +!3 = !DILocation(line: 3, column: 20, scope: !4) |
| 91 | +!4 = distinct !DISubprogram(name: "do_not_interleave", scope: !5, file: !5, line: 1, type: !6, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, function: void (float*, float*, i32)* @do_not_interleave, variables: !7) |
| 92 | +!5 = !DIFile(filename: "vectorization-remarks-profitable.c", directory: "") |
| 93 | +!6 = !DISubroutineType(types: !7) |
| 94 | +!7 = !{} |
| 95 | +!8 = !DILocation(line: 3, column: 3, scope: !4) |
| 96 | +!9 = !DILocation(line: 4, column: 14, scope: !4) |
| 97 | +!10 = !DILocation(line: 6, column: 1, scope: !4) |
| 98 | +!11 = !{!12, !12, i64 0} |
| 99 | +!12 = !{!"float", !13, i64 0} |
| 100 | +!13 = !{!"omnipotent char", !14, i64 0} |
| 101 | +!14 = !{!"Simple C/C++ TBAA"} |
| 102 | +!15 = !DILocation(line: 4, column: 5, scope: !4) |
| 103 | +!16 = !DILocation(line: 4, column: 12, scope: !4) |
| 104 | +!17 = distinct !{!17, !18, !19} |
| 105 | +!18 = !{!"llvm.loop.interleave.count", i32 1} |
| 106 | +!19 = !{!"llvm.loop.unroll.disable"} |
| 107 | +!20 = !DILocation(line: 10, column: 20, scope: !21) |
| 108 | +!21 = distinct !DISubprogram(name: "interleave_not_profitable", scope: !5, file: !5, line: 8, type: !6, isLocal: false, isDefinition: true, scopeLine: 8, flags: DIFlagPrototyped, isOptimized: true, function: void (float*, float*, i32)* @interleave_not_profitable, variables: !7) |
| 109 | +!22 = !DILocation(line: 10, column: 3, scope: !21) |
| 110 | +!23 = !DILocation(line: 11, column: 14, scope: !21) |
| 111 | +!24 = !DILocation(line: 13, column: 1, scope: !21) |
| 112 | +!25 = !DILocation(line: 11, column: 5, scope: !21) |
| 113 | +!26 = !DILocation(line: 11, column: 12, scope: !21) |
| 114 | +!27 = distinct !{!27, !19} |
0 commit comments