@@ -24,23 +24,23 @@ define void @foo(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) {
24
24
; SSE-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <8 x i32> [[WIDE_VEC1]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
25
25
; SSE-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
26
26
; SSE-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <8 x i32> [[WIDE_VEC1]], <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
27
- ; SSE-NEXT: [[TMP7 :%.*]] = add nsw <4 x i32> [[STRIDED_VEC3]], [[STRIDED_VEC]]
28
- ; SSE-NEXT: [[TMP8 :%.*]] = add nsw <4 x i32> [[STRIDED_VEC4]], [[STRIDED_VEC2]]
29
- ; SSE-NEXT: [[TMP9 :%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
30
- ; SSE-NEXT: store <4 x i32> [[TMP7 ]], ptr [[TMP9 ]], align 4
31
- ; SSE-NEXT: [[TMP11 :%.*]] = getelementptr inbounds i32, ptr [[TMP9 ]], i64 4
32
- ; SSE-NEXT: store <4 x i32> [[TMP8 ]], ptr [[TMP11 ]], align 4
27
+ ; SSE-NEXT: [[TMP5 :%.*]] = add nsw <4 x i32> [[STRIDED_VEC3]], [[STRIDED_VEC]]
28
+ ; SSE-NEXT: [[TMP6 :%.*]] = add nsw <4 x i32> [[STRIDED_VEC4]], [[STRIDED_VEC2]]
29
+ ; SSE-NEXT: [[TMP7 :%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
30
+ ; SSE-NEXT: store <4 x i32> [[TMP5 ]], ptr [[TMP7 ]], align 4
31
+ ; SSE-NEXT: [[TMP8 :%.*]] = getelementptr inbounds i32, ptr [[TMP7 ]], i64 4
32
+ ; SSE-NEXT: store <4 x i32> [[TMP6 ]], ptr [[TMP8 ]], align 4
33
33
; SSE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
34
- ; SSE-NEXT: [[TMP13 :%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
35
- ; SSE-NEXT: br i1 [[TMP13 ]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
34
+ ; SSE-NEXT: [[TMP9 :%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
35
+ ; SSE-NEXT: br i1 [[TMP9 ]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
36
36
; SSE: middle.block:
37
37
; SSE-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
38
38
; SSE: scalar.ph:
39
39
; SSE-NEXT: br label [[FOR_BODY:%.*]]
40
40
; SSE: for.cond.cleanup:
41
41
; SSE-NEXT: ret void
42
42
; SSE: for.body:
43
- ; SSE-NEXT: br i1 poison, label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP2 :![0-9]+]]
43
+ ; SSE-NEXT: br i1 poison, label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP3 :![0-9]+]]
44
44
;
45
45
; AVX1-LABEL: @foo(
46
46
; AVX1-NEXT: entry:
@@ -72,29 +72,29 @@ define void @foo(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) {
72
72
; AVX1-NEXT: [[STRIDED_VEC8:%.*]] = shufflevector <8 x i32> [[WIDE_VEC1]], <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
73
73
; AVX1-NEXT: [[STRIDED_VEC9:%.*]] = shufflevector <8 x i32> [[WIDE_VEC2]], <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
74
74
; AVX1-NEXT: [[STRIDED_VEC10:%.*]] = shufflevector <8 x i32> [[WIDE_VEC3]], <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
75
- ; AVX1-NEXT: [[TMP15 :%.*]] = add nsw <4 x i32> [[STRIDED_VEC7]], [[STRIDED_VEC]]
76
- ; AVX1-NEXT: [[TMP16 :%.*]] = add nsw <4 x i32> [[STRIDED_VEC8]], [[STRIDED_VEC4]]
77
- ; AVX1-NEXT: [[TMP17 :%.*]] = add nsw <4 x i32> [[STRIDED_VEC9]], [[STRIDED_VEC5]]
78
- ; AVX1-NEXT: [[TMP18 :%.*]] = add nsw <4 x i32> [[STRIDED_VEC10]], [[STRIDED_VEC6]]
79
- ; AVX1-NEXT: [[TMP19 :%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
80
- ; AVX1-NEXT: store <4 x i32> [[TMP15 ]], ptr [[TMP19 ]], align 4
81
- ; AVX1-NEXT: [[TMP21 :%.*]] = getelementptr inbounds i32, ptr [[TMP19 ]], i64 4
82
- ; AVX1-NEXT: store <4 x i32> [[TMP16 ]], ptr [[TMP21 ]], align 4
83
- ; AVX1-NEXT: [[TMP23 :%.*]] = getelementptr inbounds i32, ptr [[TMP19 ]], i64 8
84
- ; AVX1-NEXT: store <4 x i32> [[TMP17 ]], ptr [[TMP23 ]], align 4
85
- ; AVX1-NEXT: [[TMP25 :%.*]] = getelementptr inbounds i32, ptr [[TMP19 ]], i64 12
86
- ; AVX1-NEXT: store <4 x i32> [[TMP18 ]], ptr [[TMP25 ]], align 4
75
+ ; AVX1-NEXT: [[TMP11 :%.*]] = add nsw <4 x i32> [[STRIDED_VEC7]], [[STRIDED_VEC]]
76
+ ; AVX1-NEXT: [[TMP12 :%.*]] = add nsw <4 x i32> [[STRIDED_VEC8]], [[STRIDED_VEC4]]
77
+ ; AVX1-NEXT: [[TMP13 :%.*]] = add nsw <4 x i32> [[STRIDED_VEC9]], [[STRIDED_VEC5]]
78
+ ; AVX1-NEXT: [[TMP14 :%.*]] = add nsw <4 x i32> [[STRIDED_VEC10]], [[STRIDED_VEC6]]
79
+ ; AVX1-NEXT: [[TMP15 :%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
80
+ ; AVX1-NEXT: store <4 x i32> [[TMP11 ]], ptr [[TMP15 ]], align 4
81
+ ; AVX1-NEXT: [[TMP16 :%.*]] = getelementptr inbounds i32, ptr [[TMP15 ]], i64 4
82
+ ; AVX1-NEXT: store <4 x i32> [[TMP12 ]], ptr [[TMP16 ]], align 4
83
+ ; AVX1-NEXT: [[TMP17 :%.*]] = getelementptr inbounds i32, ptr [[TMP15 ]], i64 8
84
+ ; AVX1-NEXT: store <4 x i32> [[TMP13 ]], ptr [[TMP17 ]], align 4
85
+ ; AVX1-NEXT: [[TMP18 :%.*]] = getelementptr inbounds i32, ptr [[TMP15 ]], i64 12
86
+ ; AVX1-NEXT: store <4 x i32> [[TMP14 ]], ptr [[TMP18 ]], align 4
87
87
; AVX1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
88
- ; AVX1-NEXT: [[TMP27 :%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
89
- ; AVX1-NEXT: br i1 [[TMP27 ]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
88
+ ; AVX1-NEXT: [[TMP19 :%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
89
+ ; AVX1-NEXT: br i1 [[TMP19 ]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
90
90
; AVX1: middle.block:
91
91
; AVX1-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
92
92
; AVX1: scalar.ph:
93
93
; AVX1-NEXT: br label [[FOR_BODY:%.*]]
94
94
; AVX1: for.cond.cleanup:
95
95
; AVX1-NEXT: ret void
96
96
; AVX1: for.body:
97
- ; AVX1-NEXT: br i1 poison, label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP2 :![0-9]+]]
97
+ ; AVX1-NEXT: br i1 poison, label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP3 :![0-9]+]]
98
98
;
99
99
; AVX2-LABEL: @foo(
100
100
; AVX2-NEXT: entry:
@@ -126,29 +126,29 @@ define void @foo(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) {
126
126
; AVX2-NEXT: [[STRIDED_VEC8:%.*]] = shufflevector <16 x i32> [[WIDE_VEC1]], <16 x i32> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
127
127
; AVX2-NEXT: [[STRIDED_VEC9:%.*]] = shufflevector <16 x i32> [[WIDE_VEC2]], <16 x i32> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
128
128
; AVX2-NEXT: [[STRIDED_VEC10:%.*]] = shufflevector <16 x i32> [[WIDE_VEC3]], <16 x i32> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
129
- ; AVX2-NEXT: [[TMP15 :%.*]] = add nsw <8 x i32> [[STRIDED_VEC7]], [[STRIDED_VEC]]
130
- ; AVX2-NEXT: [[TMP16 :%.*]] = add nsw <8 x i32> [[STRIDED_VEC8]], [[STRIDED_VEC4]]
131
- ; AVX2-NEXT: [[TMP17 :%.*]] = add nsw <8 x i32> [[STRIDED_VEC9]], [[STRIDED_VEC5]]
132
- ; AVX2-NEXT: [[TMP18 :%.*]] = add nsw <8 x i32> [[STRIDED_VEC10]], [[STRIDED_VEC6]]
133
- ; AVX2-NEXT: [[TMP19 :%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
134
- ; AVX2-NEXT: store <8 x i32> [[TMP15 ]], ptr [[TMP19 ]], align 4
135
- ; AVX2-NEXT: [[TMP21 :%.*]] = getelementptr inbounds i32, ptr [[TMP19 ]], i64 8
136
- ; AVX2-NEXT: store <8 x i32> [[TMP16 ]], ptr [[TMP21 ]], align 4
137
- ; AVX2-NEXT: [[TMP23 :%.*]] = getelementptr inbounds i32, ptr [[TMP19 ]], i64 16
138
- ; AVX2-NEXT: store <8 x i32> [[TMP17 ]], ptr [[TMP23 ]], align 4
139
- ; AVX2-NEXT: [[TMP25 :%.*]] = getelementptr inbounds i32, ptr [[TMP19 ]], i64 24
140
- ; AVX2-NEXT: store <8 x i32> [[TMP18 ]], ptr [[TMP25 ]], align 4
129
+ ; AVX2-NEXT: [[TMP11 :%.*]] = add nsw <8 x i32> [[STRIDED_VEC7]], [[STRIDED_VEC]]
130
+ ; AVX2-NEXT: [[TMP12 :%.*]] = add nsw <8 x i32> [[STRIDED_VEC8]], [[STRIDED_VEC4]]
131
+ ; AVX2-NEXT: [[TMP13 :%.*]] = add nsw <8 x i32> [[STRIDED_VEC9]], [[STRIDED_VEC5]]
132
+ ; AVX2-NEXT: [[TMP14 :%.*]] = add nsw <8 x i32> [[STRIDED_VEC10]], [[STRIDED_VEC6]]
133
+ ; AVX2-NEXT: [[TMP15 :%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
134
+ ; AVX2-NEXT: store <8 x i32> [[TMP11 ]], ptr [[TMP15 ]], align 4
135
+ ; AVX2-NEXT: [[TMP16 :%.*]] = getelementptr inbounds i32, ptr [[TMP15 ]], i64 8
136
+ ; AVX2-NEXT: store <8 x i32> [[TMP12 ]], ptr [[TMP16 ]], align 4
137
+ ; AVX2-NEXT: [[TMP17 :%.*]] = getelementptr inbounds i32, ptr [[TMP15 ]], i64 16
138
+ ; AVX2-NEXT: store <8 x i32> [[TMP13 ]], ptr [[TMP17 ]], align 4
139
+ ; AVX2-NEXT: [[TMP18 :%.*]] = getelementptr inbounds i32, ptr [[TMP15 ]], i64 24
140
+ ; AVX2-NEXT: store <8 x i32> [[TMP14 ]], ptr [[TMP18 ]], align 4
141
141
; AVX2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
142
- ; AVX2-NEXT: [[TMP27 :%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
143
- ; AVX2-NEXT: br i1 [[TMP27 ]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
142
+ ; AVX2-NEXT: [[TMP19 :%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
143
+ ; AVX2-NEXT: br i1 [[TMP19 ]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
144
144
; AVX2: middle.block:
145
145
; AVX2-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
146
146
; AVX2: scalar.ph:
147
147
; AVX2-NEXT: br label [[FOR_BODY:%.*]]
148
148
; AVX2: for.cond.cleanup:
149
149
; AVX2-NEXT: ret void
150
150
; AVX2: for.body:
151
- ; AVX2-NEXT: br i1 poison, label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP2 :![0-9]+]]
151
+ ; AVX2-NEXT: br i1 poison, label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP3 :![0-9]+]]
152
152
;
153
153
; ATOM-LABEL: @foo(
154
154
; ATOM-NEXT: entry:
0 commit comments