3
3
4
4
@a = dso_local local_unnamed_addr global [65536 x float ] zeroinitializer , align 16
5
5
6
- ; Equivalent C code for the test case :
6
+ ; Generated from the following C code :
7
7
; #define LEN 256 * 256
8
8
; float a[LEN];
9
-
9
+ ;
10
10
; void different_strides() {
11
11
; for (int i = 0; i < LEN - 1024 - 255; i++) {
12
12
; #pragma clang loop interleave(disable)
15
15
; a[i + j + 1024] += a[j * 4 + i];
16
16
; }
17
17
; }
18
- define dso_local void @different_strides () local_unnamed_addr {
19
- ; CHECK-LABEL: 'different_strides'
20
- ; CHECK-NEXT: for.body4:
18
+ ; The load and store have different strides(4 and 16 bytes respectively) but the store
19
+ ; is always at safe positive distance away from the load, thus BackwardVectorizable
20
+ define dso_local void @different_strides_backward_vectorizable () local_unnamed_addr {
21
+ ; CHECK-LABEL: 'different_strides_backward_vectorizable'
22
+ ; CHECK-NEXT: inner.body:
21
23
; CHECK-NEXT: Memory dependences are safe with a maximum safe vector width of 2048 bits
22
24
; CHECK-NEXT: Dependences:
23
25
; CHECK-NEXT: BackwardVectorizable:
@@ -35,7 +37,82 @@ define dso_local void @different_strides() local_unnamed_addr {
35
37
; CHECK-NEXT: SCEV assumptions:
36
38
; CHECK-EMPTY:
37
39
; CHECK-NEXT: Expressions re-written:
38
- ; CHECK-NEXT: for.cond1.preheader:
40
+ ; CHECK-NEXT: outer.header:
41
+ ; CHECK-NEXT: Report: loop is not the innermost loop
42
+ ; CHECK-NEXT: Dependences:
43
+ ; CHECK-NEXT: Run-time memory checks:
44
+ ; CHECK-NEXT: Grouped accesses:
45
+ ; CHECK-EMPTY:
46
+ ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
47
+ ; CHECK-NEXT: SCEV assumptions:
48
+ ; CHECK-EMPTY:
49
+ ; CHECK-NEXT: Expressions re-written:
50
+ ;
51
+ entry:
52
+ br label %outer.header
53
+
54
+ outer.header:
55
+ %i = phi i64 [ 0 , %entry ], [ %i.next , %outer.exit ]
56
+ %0 = add nuw nsw i64 %i , 1024
57
+ br label %inner.body
58
+
59
+ inner.body:
60
+ %j = phi i64 [ 0 , %outer.header ], [ %j.next , %inner.body ]
61
+ %1 = shl nuw nsw i64 %j , 2
62
+ %2 = add nuw nsw i64 %1 , %i
63
+ %arrayidx = getelementptr inbounds [65536 x float ], ptr @a , i64 0 , i64 %2
64
+ %3 = load float , ptr %arrayidx , align 4
65
+ %4 = add nuw nsw i64 %0 , %j
66
+ %arrayidx8 = getelementptr inbounds [65536 x float ], ptr @a , i64 0 , i64 %4
67
+ %5 = load float , ptr %arrayidx8 , align 4
68
+ %add9 = fadd fast float %5 , %3
69
+ store float %add9 , ptr %arrayidx8 , align 4
70
+ %j.next = add nuw nsw i64 %j , 1
71
+ %exitcond.not = icmp eq i64 %j.next , 256
72
+ br i1 %exitcond.not , label %outer.exit , label %inner.body
73
+
74
+ outer.exit:
75
+ %i.next = add nuw nsw i64 %i , 1
76
+ %outerexitcond.not = icmp eq i64 %i.next , 64257
77
+ br i1 %outerexitcond.not , label %exit , label %outer.header
78
+
79
+ exit:
80
+ ret void
81
+ }
82
+
83
+
84
+ ; Generated from following C code:
85
+ ; void different_stride_and_not_vectorizable(){
86
+ ; for(int i = 0; i < LEN2; i++){
87
+ ; for(int j = 0 ; j < LEN; j++){
88
+ ; a[i + j + LEN] += a[i + 4*j];
89
+ ; }
90
+ ; }
91
+ ; }
92
+ ; The load and store have different strides, but the store and load are not at a
93
+ ; safe distance away from each other, thus not safe for vectorization.
94
+ define dso_local void @different_stride_and_not_vectorizable () local_unnamed_addr {
95
+ ; CHECK-LABEL: 'different_stride_and_not_vectorizable'
96
+ ; CHECK-NEXT: inner.body:
97
+ ; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
98
+ ; CHECK-NEXT: Unknown data dependence.
99
+ ; CHECK-NEXT: Dependences:
100
+ ; CHECK-NEXT: Unknown:
101
+ ; CHECK-NEXT: %3 = load float, ptr %arrayidx, align 4 ->
102
+ ; CHECK-NEXT: store float %add9, ptr %arrayidx8, align 4
103
+ ; CHECK-EMPTY:
104
+ ; CHECK-NEXT: Forward:
105
+ ; CHECK-NEXT: %5 = load float, ptr %arrayidx8, align 4 ->
106
+ ; CHECK-NEXT: store float %add9, ptr %arrayidx8, align 4
107
+ ; CHECK-EMPTY:
108
+ ; CHECK-NEXT: Run-time memory checks:
109
+ ; CHECK-NEXT: Grouped accesses:
110
+ ; CHECK-EMPTY:
111
+ ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
112
+ ; CHECK-NEXT: SCEV assumptions:
113
+ ; CHECK-EMPTY:
114
+ ; CHECK-NEXT: Expressions re-written:
115
+ ; CHECK-NEXT: outer.header:
39
116
; CHECK-NEXT: Report: loop is not the innermost loop
40
117
; CHECK-NEXT: Dependences:
41
118
; CHECK-NEXT: Run-time memory checks:
@@ -47,34 +124,33 @@ define dso_local void @different_strides() local_unnamed_addr {
47
124
; CHECK-NEXT: Expressions re-written:
48
125
;
49
126
entry:
50
- br label %for.cond1.preheader
127
+ br label %outer.header
51
128
52
- for.cond1.preheader :
53
- %indvars.iv25 = phi i64 [ 0 , %entry ], [ %indvars.iv.next26 , %for.cond.cleanup3 ]
54
- %0 = add nuw nsw i64 %indvars.iv25 , 1024
55
- br label %for.body4
129
+ outer.header :
130
+ %i = phi i64 [ 0 , %entry ], [ %i.next , %outer.exit ]
131
+ %0 = add nuw nsw i64 %i , 256
132
+ br label %inner.body
56
133
57
- for.cond.cleanup :
134
+ exit :
58
135
ret void
59
136
60
- for.cond.cleanup3 :
61
- %indvars.iv.next26 = add nuw nsw i64 %indvars.iv25 , 1
62
- %exitcond29.not = icmp eq i64 %indvars.iv.next26 , 64257
63
- br i1 %exitcond29.not , label %for.cond.cleanup , label %for.cond1.preheader
137
+ outer.exit :
138
+ %i.next = add nuw nsw i64 %i , 1
139
+ %exitcond29.not = icmp eq i64 %i.next , 65536
140
+ br i1 %exitcond29.not , label %exit , label %outer.header
64
141
65
- for.body4 :
66
- %indvars.iv = phi i64 [ 0 , %for.cond1.preheader ], [ %indvars.iv. next , %for.body4 ]
67
- %1 = shl nuw nsw i64 %indvars.iv , 2
68
- %2 = add nuw nsw i64 %1 , %indvars.iv25
142
+ inner.body :
143
+ %j = phi i64 [ 0 , %outer.header ], [ %j. next , %inner.body ]
144
+ %1 = shl nuw nsw i64 %j , 2
145
+ %2 = add nuw nsw i64 %1 , %i
69
146
%arrayidx = getelementptr inbounds [65536 x float ], ptr @a , i64 0 , i64 %2
70
147
%3 = load float , ptr %arrayidx , align 4
71
- %4 = add nuw nsw i64 %0 , %indvars.iv
148
+ %4 = add nuw nsw i64 %0 , %j
72
149
%arrayidx8 = getelementptr inbounds [65536 x float ], ptr @a , i64 0 , i64 %4
73
150
%5 = load float , ptr %arrayidx8 , align 4
74
151
%add9 = fadd fast float %5 , %3
75
152
store float %add9 , ptr %arrayidx8 , align 4
76
- %indvars.iv. next = add nuw nsw i64 %indvars.iv , 1
77
- %exitcond.not = icmp eq i64 %indvars.iv .next , 256
78
- br i1 %exitcond.not , label %for.cond.cleanup3 , label %for.body4
153
+ %j. next = add nuw nsw i64 %j , 1
154
+ %exitcond.not = icmp eq i64 %j .next , 256
155
+ br i1 %exitcond.not , label %outer.exit , label %inner.body
79
156
}
80
-
0 commit comments