4
4
define i32 @add_of_reduce_add (<16 x i32 > %v0 , <16 x i32 > %v1 ) {
5
5
; CHECK-LABEL: define i32 @add_of_reduce_add(
6
6
; CHECK-SAME: <16 x i32> [[V0:%.*]], <16 x i32> [[V1:%.*]]) {
7
- ; CHECK-NEXT: [[V0_RED:%.*]] = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[V0]])
8
- ; CHECK-NEXT: [[V1_RED:%.*]] = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[V1]])
9
- ; CHECK-NEXT: [[RES:%.*]] = add i32 [[V0_RED]], [[V1_RED]]
7
+ ; CHECK-NEXT: [[TMP1:%.*]] = add <16 x i32> [[V0]], [[V1]]
8
+ ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP1]])
10
9
; CHECK-NEXT: ret i32 [[RES]]
11
10
;
12
11
%v0_red = tail call i32 @llvm.vector.reduce.add.v16i32 (<16 x i32 > %v0 )
@@ -31,9 +30,8 @@ define i32 @sub_of_reduce_add(<16 x i32> %v0, <16 x i32> %v1) {
31
30
define i32 @mul_of_reduce_mul (<16 x i32 > %v0 , <16 x i32 > %v1 ) {
32
31
; CHECK-LABEL: define i32 @mul_of_reduce_mul(
33
32
; CHECK-SAME: <16 x i32> [[V0:%.*]], <16 x i32> [[V1:%.*]]) {
34
- ; CHECK-NEXT: [[V0_RED:%.*]] = tail call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> [[V0]])
35
- ; CHECK-NEXT: [[V1_RED:%.*]] = tail call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> [[V1]])
36
- ; CHECK-NEXT: [[RES:%.*]] = mul i32 [[V0_RED]], [[V1_RED]]
33
+ ; CHECK-NEXT: [[TMP1:%.*]] = mul <16 x i32> [[V0]], [[V1]]
34
+ ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> [[TMP1]])
37
35
; CHECK-NEXT: ret i32 [[RES]]
38
36
;
39
37
%v0_red = tail call i32 @llvm.vector.reduce.mul.v16i32 (<16 x i32 > %v0 )
@@ -45,9 +43,8 @@ define i32 @mul_of_reduce_mul(<16 x i32> %v0, <16 x i32> %v1) {
45
43
define i32 @and_of_reduce_and (<16 x i32 > %v0 , <16 x i32 > %v1 ) {
46
44
; CHECK-LABEL: define i32 @and_of_reduce_and(
47
45
; CHECK-SAME: <16 x i32> [[V0:%.*]], <16 x i32> [[V1:%.*]]) {
48
- ; CHECK-NEXT: [[V0_RED:%.*]] = tail call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> [[V0]])
49
- ; CHECK-NEXT: [[V1_RED:%.*]] = tail call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> [[V1]])
50
- ; CHECK-NEXT: [[RES:%.*]] = and i32 [[V0_RED]], [[V1_RED]]
46
+ ; CHECK-NEXT: [[TMP1:%.*]] = and <16 x i32> [[V0]], [[V1]]
47
+ ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> [[TMP1]])
51
48
; CHECK-NEXT: ret i32 [[RES]]
52
49
;
53
50
%v0_red = tail call i32 @llvm.vector.reduce.and.v16i32 (<16 x i32 > %v0 )
@@ -59,9 +56,8 @@ define i32 @and_of_reduce_and(<16 x i32> %v0, <16 x i32> %v1) {
59
56
define i32 @or_of_reduce_or (<16 x i32 > %v0 , <16 x i32 > %v1 ) {
60
57
; CHECK-LABEL: define i32 @or_of_reduce_or(
61
58
; CHECK-SAME: <16 x i32> [[V0:%.*]], <16 x i32> [[V1:%.*]]) {
62
- ; CHECK-NEXT: [[V0_RED:%.*]] = tail call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> [[V0]])
63
- ; CHECK-NEXT: [[V1_RED:%.*]] = tail call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> [[V1]])
64
- ; CHECK-NEXT: [[RES:%.*]] = or i32 [[V0_RED]], [[V1_RED]]
59
+ ; CHECK-NEXT: [[TMP1:%.*]] = or <16 x i32> [[V0]], [[V1]]
60
+ ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> [[TMP1]])
65
61
; CHECK-NEXT: ret i32 [[RES]]
66
62
;
67
63
%v0_red = tail call i32 @llvm.vector.reduce.or.v16i32 (<16 x i32 > %v0 )
@@ -73,9 +69,8 @@ define i32 @or_of_reduce_or(<16 x i32> %v0, <16 x i32> %v1) {
73
69
define i32 @xor_of_reduce_xor (<16 x i32 > %v0 , <16 x i32 > %v1 ) {
74
70
; CHECK-LABEL: define i32 @xor_of_reduce_xor(
75
71
; CHECK-SAME: <16 x i32> [[V0:%.*]], <16 x i32> [[V1:%.*]]) {
76
- ; CHECK-NEXT: [[V0_RED:%.*]] = tail call i32 @llvm.vector.reduce.xor.v16i32(<16 x i32> [[V0]])
77
- ; CHECK-NEXT: [[V1_RED:%.*]] = tail call i32 @llvm.vector.reduce.xor.v16i32(<16 x i32> [[V1]])
78
- ; CHECK-NEXT: [[RES:%.*]] = xor i32 [[V0_RED]], [[V1_RED]]
72
+ ; CHECK-NEXT: [[TMP1:%.*]] = xor <16 x i32> [[V0]], [[V1]]
73
+ ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.xor.v16i32(<16 x i32> [[TMP1]])
79
74
; CHECK-NEXT: ret i32 [[RES]]
80
75
;
81
76
%v0_red = tail call i32 @llvm.vector.reduce.xor.v16i32 (<16 x i32 > %v0 )
@@ -161,9 +156,8 @@ define i32 @multiple_use_of_reduction_1(<16 x i32> %v0, <16 x i32> %v1, ptr %p)
161
156
define i32 @do_not_preserve_overflow_flags (<16 x i32 > %v0 , <16 x i32 > %v1 ) {
162
157
; CHECK-LABEL: define i32 @do_not_preserve_overflow_flags(
163
158
; CHECK-SAME: <16 x i32> [[V0:%.*]], <16 x i32> [[V1:%.*]]) {
164
- ; CHECK-NEXT: [[V0_RED:%.*]] = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[V0]])
165
- ; CHECK-NEXT: [[V1_RED:%.*]] = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[V1]])
166
- ; CHECK-NEXT: [[RES:%.*]] = add nuw nsw i32 [[V0_RED]], [[V1_RED]]
159
+ ; CHECK-NEXT: [[TMP1:%.*]] = add <16 x i32> [[V0]], [[V1]]
160
+ ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP1]])
167
161
; CHECK-NEXT: ret i32 [[RES]]
168
162
;
169
163
%v0_red = tail call i32 @llvm.vector.reduce.add.v16i32 (<16 x i32 > %v0 )
@@ -175,9 +169,8 @@ define i32 @do_not_preserve_overflow_flags(<16 x i32> %v0, <16 x i32> %v1) {
175
169
define i32 @preserve_disjoint_flags (<16 x i32 > %v0 , <16 x i32 > %v1 ) {
176
170
; CHECK-LABEL: define i32 @preserve_disjoint_flags(
177
171
; CHECK-SAME: <16 x i32> [[V0:%.*]], <16 x i32> [[V1:%.*]]) {
178
- ; CHECK-NEXT: [[V0_RED:%.*]] = tail call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> [[V0]])
179
- ; CHECK-NEXT: [[V1_RED:%.*]] = tail call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> [[V1]])
180
- ; CHECK-NEXT: [[RES:%.*]] = or disjoint i32 [[V0_RED]], [[V1_RED]]
172
+ ; CHECK-NEXT: [[TMP1:%.*]] = or disjoint <16 x i32> [[V0]], [[V1]]
173
+ ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> [[TMP1]])
181
174
; CHECK-NEXT: ret i32 [[RES]]
182
175
;
183
176
%v0_red = tail call i32 @llvm.vector.reduce.or.v16i32 (<16 x i32 > %v0 )
@@ -189,9 +182,8 @@ define i32 @preserve_disjoint_flags(<16 x i32> %v0, <16 x i32> %v1) {
189
182
define i32 @add_of_reduce_add_vscale (<vscale x 16 x i32 > %v0 , <vscale x 16 x i32 > %v1 ) {
190
183
; CHECK-LABEL: define i32 @add_of_reduce_add_vscale(
191
184
; CHECK-SAME: <vscale x 16 x i32> [[V0:%.*]], <vscale x 16 x i32> [[V1:%.*]]) {
192
- ; CHECK-NEXT: [[V0_RED:%.*]] = tail call i32 @llvm.vector.reduce.add.nxv16i32(<vscale x 16 x i32> [[V0]])
193
- ; CHECK-NEXT: [[V1_RED:%.*]] = tail call i32 @llvm.vector.reduce.add.nxv16i32(<vscale x 16 x i32> [[V1]])
194
- ; CHECK-NEXT: [[RES:%.*]] = add i32 [[V0_RED]], [[V1_RED]]
185
+ ; CHECK-NEXT: [[TMP1:%.*]] = add <vscale x 16 x i32> [[V0]], [[V1]]
186
+ ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.add.nxv16i32(<vscale x 16 x i32> [[TMP1]])
195
187
; CHECK-NEXT: ret i32 [[RES]]
196
188
;
197
189
%v0_red = tail call i32 @llvm.vector.reduce.add.nxv16i32 (<vscale x 16 x i32 > %v0 )
0 commit comments