Skip to content

Commit f161113

Browse files
author
Mikhail Gudim
committed
[InstCombine] Precommit a test for folding a binary op of reductions.
1 parent 207e485 commit f161113

File tree

1 file changed

+166
-0
lines changed

1 file changed

+166
-0
lines changed
Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt < %s -passes=instcombine -S | FileCheck %s
3+
4+
define i32 @add_of_reduce_add(<16 x i32> %v0, <16 x i32> %v1) {
5+
; CHECK-LABEL: define i32 @add_of_reduce_add(
6+
; CHECK-SAME: <16 x i32> [[V0:%.*]], <16 x i32> [[V1:%.*]]) {
7+
; CHECK-NEXT: [[TMP1:%.*]] = add <16 x i32> [[V0]], [[V1]]
8+
; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP1]])
9+
; CHECK-NEXT: ret i32 [[RES]]
10+
;
11+
%v0_red = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %v0)
12+
%v1_red = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %v1)
13+
%res = add i32 %v0_red, %v1_red
14+
ret i32 %res
15+
}
16+
17+
define i32 @sub_of_reduce_add(<16 x i32> %v0, <16 x i32> %v1) {
18+
; CHECK-LABEL: define i32 @sub_of_reduce_add(
19+
; CHECK-SAME: <16 x i32> [[V0:%.*]], <16 x i32> [[V1:%.*]]) {
20+
; CHECK-NEXT: [[TMP1:%.*]] = sub <16 x i32> [[V0]], [[V1]]
21+
; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP1]])
22+
; CHECK-NEXT: ret i32 [[RES]]
23+
;
24+
%v0_red = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %v0)
25+
%v1_red = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %v1)
26+
%res = sub i32 %v0_red, %v1_red
27+
ret i32 %res
28+
}
29+
30+
define i32 @mul_of_reduce_mul(<16 x i32> %v0, <16 x i32> %v1) {
31+
; CHECK-LABEL: define i32 @mul_of_reduce_mul(
32+
; CHECK-SAME: <16 x i32> [[V0:%.*]], <16 x i32> [[V1:%.*]]) {
33+
; CHECK-NEXT: [[TMP1:%.*]] = mul <16 x i32> [[V0]], [[V1]]
34+
; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> [[TMP1]])
35+
; CHECK-NEXT: ret i32 [[RES]]
36+
;
37+
%v0_red = tail call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> %v0)
38+
%v1_red = tail call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> %v1)
39+
%res = mul i32 %v0_red, %v1_red
40+
ret i32 %res
41+
}
42+
43+
define i32 @and_of_reduce_and(<16 x i32> %v0, <16 x i32> %v1) {
44+
; CHECK-LABEL: define i32 @and_of_reduce_and(
45+
; CHECK-SAME: <16 x i32> [[V0:%.*]], <16 x i32> [[V1:%.*]]) {
46+
; CHECK-NEXT: [[TMP1:%.*]] = and <16 x i32> [[V0]], [[V1]]
47+
; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> [[TMP1]])
48+
; CHECK-NEXT: ret i32 [[RES]]
49+
;
50+
%v0_red = tail call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> %v0)
51+
%v1_red = tail call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> %v1)
52+
%res = and i32 %v0_red, %v1_red
53+
ret i32 %res
54+
}
55+
56+
define i32 @or_of_reduce_or(<16 x i32> %v0, <16 x i32> %v1) {
57+
; CHECK-LABEL: define i32 @or_of_reduce_or(
58+
; CHECK-SAME: <16 x i32> [[V0:%.*]], <16 x i32> [[V1:%.*]]) {
59+
; CHECK-NEXT: [[TMP1:%.*]] = or <16 x i32> [[V0]], [[V1]]
60+
; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> [[TMP1]])
61+
; CHECK-NEXT: ret i32 [[RES]]
62+
;
63+
%v0_red = tail call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> %v0)
64+
%v1_red = tail call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> %v1)
65+
%res = or i32 %v0_red, %v1_red
66+
ret i32 %res
67+
}
68+
69+
define i32 @xor_of_reduce_xor(<16 x i32> %v0, <16 x i32> %v1) {
70+
; CHECK-LABEL: define i32 @xor_of_reduce_xor(
71+
; CHECK-SAME: <16 x i32> [[V0:%.*]], <16 x i32> [[V1:%.*]]) {
72+
; CHECK-NEXT: [[TMP1:%.*]] = xor <16 x i32> [[V0]], [[V1]]
73+
; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.xor.v16i32(<16 x i32> [[TMP1]])
74+
; CHECK-NEXT: ret i32 [[RES]]
75+
;
76+
%v0_red = tail call i32 @llvm.vector.reduce.xor.v16i32(<16 x i32> %v0)
77+
%v1_red = tail call i32 @llvm.vector.reduce.xor.v16i32(<16 x i32> %v1)
78+
%res = xor i32 %v0_red, %v1_red
79+
ret i32 %res
80+
}
81+
82+
define i32 @reduction_does_not_match_binop(<16 x i32> %v0, <16 x i32> %v1) {
83+
; CHECK-LABEL: define i32 @reduction_does_not_match_binop(
84+
; CHECK-SAME: <16 x i32> [[V0:%.*]], <16 x i32> [[V1:%.*]]) {
85+
; CHECK-NEXT: [[V0_RED:%.*]] = tail call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> [[V0]])
86+
; CHECK-NEXT: [[V1_RED:%.*]] = tail call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> [[V1]])
87+
; CHECK-NEXT: [[RES:%.*]] = add i32 [[V0_RED]], [[V1_RED]]
88+
; CHECK-NEXT: ret i32 [[RES]]
89+
;
90+
%v0_red = tail call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> %v0)
91+
%v1_red = tail call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> %v1)
92+
%res = add i32 %v0_red, %v1_red
93+
ret i32 %res
94+
}
95+
96+
define i32 @element_counts_do_not_match(<16 x i32> %v0, <8 x i32> %v1) {
97+
; CHECK-LABEL: define i32 @element_counts_do_not_match(
98+
; CHECK-SAME: <16 x i32> [[V0:%.*]], <8 x i32> [[V1:%.*]]) {
99+
; CHECK-NEXT: [[V0_RED:%.*]] = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[V0]])
100+
; CHECK-NEXT: [[V1_RED:%.*]] = tail call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[V1]])
101+
; CHECK-NEXT: [[RES:%.*]] = add i32 [[V0_RED]], [[V1_RED]]
102+
; CHECK-NEXT: ret i32 [[RES]]
103+
;
104+
%v0_red = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %v0)
105+
%v1_red = tail call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %v1)
106+
%res = add i32 %v0_red, %v1_red
107+
ret i32 %res
108+
}
109+
110+
define i32 @multiple_use_of_reduction_0(<16 x i32> %v0, <16 x i32> %v1, ptr %p) {
111+
; CHECK-LABEL: define i32 @multiple_use_of_reduction_0(
112+
; CHECK-SAME: <16 x i32> [[V0:%.*]], <16 x i32> [[V1:%.*]], ptr [[P:%.*]]) {
113+
; CHECK-NEXT: [[V0_RED:%.*]] = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[V0]])
114+
; CHECK-NEXT: [[V1_RED:%.*]] = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[V1]])
115+
; CHECK-NEXT: [[RES:%.*]] = add i32 [[V0_RED]], [[V1_RED]]
116+
; CHECK-NEXT: store i32 [[V0_RED]], ptr [[P]], align 4
117+
; CHECK-NEXT: ret i32 [[RES]]
118+
;
119+
%v0_red = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %v0)
120+
%v1_red = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %v1)
121+
%res = add i32 %v0_red, %v1_red
122+
store i32 %v0_red, ptr %p
123+
ret i32 %res
124+
}
125+
126+
define i32 @multiple_use_of_reduction_1(<16 x i32> %v0, <16 x i32> %v1, ptr %p) {
127+
; CHECK-LABEL: define i32 @multiple_use_of_reduction_1(
128+
; CHECK-SAME: <16 x i32> [[V0:%.*]], <16 x i32> [[V1:%.*]], ptr [[P:%.*]]) {
129+
; CHECK-NEXT: [[V0_RED:%.*]] = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[V0]])
130+
; CHECK-NEXT: [[V1_RED:%.*]] = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[V1]])
131+
; CHECK-NEXT: [[RES:%.*]] = add i32 [[V0_RED]], [[V1_RED]]
132+
; CHECK-NEXT: store i32 [[V1_RED]], ptr [[P]], align 4
133+
; CHECK-NEXT: ret i32 [[RES]]
134+
;
135+
%v0_red = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %v0)
136+
%v1_red = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %v1)
137+
%res = add i32 %v0_red, %v1_red
138+
store i32 %v1_red, ptr %p
139+
ret i32 %res
140+
}
141+
142+
define i32 @do_not_preserve_overflow_flags(<16 x i32> %v0, <16 x i32> %v1, ptr %p) {
143+
; CHECK-LABEL: define i32 @do_not_preserve_overflow_flags(
144+
; CHECK-SAME: <16 x i32> [[V0:%.*]], <16 x i32> [[V1:%.*]], ptr [[P:%.*]]) {
145+
; CHECK-NEXT: [[TMP1:%.*]] = add <16 x i32> [[V0]], [[V1]]
146+
; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP1]])
147+
; CHECK-NEXT: ret i32 [[RES]]
148+
;
149+
%v0_red = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %v0)
150+
%v1_red = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %v1)
151+
%res = add nsw nuw i32 %v0_red, %v1_red
152+
ret i32 %res
153+
}
154+
155+
define i32 @preserve_disjoint_flags(<16 x i32> %v0, <16 x i32> %v1, ptr %p) {
156+
; CHECK-LABEL: define i32 @preserve_disjoint_flags(
157+
; CHECK-SAME: <16 x i32> [[V0:%.*]], <16 x i32> [[V1:%.*]], ptr [[P:%.*]]) {
158+
; CHECK-NEXT: [[TMP1:%.*]] = or disjoint <16 x i32> [[V0]], [[V1]]
159+
; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> [[TMP1]])
160+
; CHECK-NEXT: ret i32 [[RES]]
161+
;
162+
%v0_red = tail call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> %v0)
163+
%v1_red = tail call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> %v1)
164+
%res = or disjoint i32 %v0_red, %v1_red
165+
ret i32 %res
166+
}

0 commit comments

Comments
 (0)