Skip to content

Commit 80b2365

Browse files
committed
AMDGPU: Add baseline test for vectorize of integer min/max
1 parent dd9c04c commit 80b2365

File tree

1 file changed

+366
-0
lines changed
  • llvm/test/Transforms/SLPVectorizer/AMDGPU

1 file changed

+366
-0
lines changed
Lines changed: 366 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,366 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -passes=slp-vectorizer,instcombine %s | FileCheck -check-prefixes=GCN,GFX7 %s
3+
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -passes=slp-vectorizer,instcombine %s | FileCheck -check-prefixes=GCN,GFX8 %s
4+
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=slp-vectorizer,instcombine %s | FileCheck -check-prefixes=GCN,GFX9 %s
5+
6+
define <2 x i16> @uadd_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) {
7+
; GFX7-LABEL: @uadd_sat_v2i16(
8+
; GFX7-NEXT: bb:
9+
; GFX7-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0
10+
; GFX7-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1
11+
; GFX7-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0
12+
; GFX7-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1
13+
; GFX7-NEXT: [[ADD_0:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_0]], i16 [[ARG1_0]])
14+
; GFX7-NEXT: [[ADD_1:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_1]], i16 [[ARG1_1]])
15+
; GFX7-NEXT: [[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], i64 0
16+
; GFX7-NEXT: [[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 [[ADD_1]], i64 1
17+
; GFX7-NEXT: ret <2 x i16> [[INS_1]]
18+
;
19+
; GFX8-LABEL: @uadd_sat_v2i16(
20+
; GFX8-NEXT: bb:
21+
; GFX8-NEXT: [[TMP0:%.*]] = call <2 x i16> @llvm.umin.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
22+
; GFX8-NEXT: ret <2 x i16> [[TMP0]]
23+
;
24+
; GFX9-LABEL: @uadd_sat_v2i16(
25+
; GFX9-NEXT: bb:
26+
; GFX9-NEXT: [[TMP0:%.*]] = call <2 x i16> @llvm.umin.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
27+
; GFX9-NEXT: ret <2 x i16> [[TMP0]]
28+
;
29+
bb:
30+
%arg0.0 = extractelement <2 x i16> %arg0, i64 0
31+
%arg0.1 = extractelement <2 x i16> %arg0, i64 1
32+
%arg1.0 = extractelement <2 x i16> %arg1, i64 0
33+
%arg1.1 = extractelement <2 x i16> %arg1, i64 1
34+
%add.0 = call i16 @llvm.umin.i16(i16 %arg0.0, i16 %arg1.0)
35+
%add.1 = call i16 @llvm.umin.i16(i16 %arg0.1, i16 %arg1.1)
36+
%ins.0 = insertelement <2 x i16> undef, i16 %add.0, i64 0
37+
%ins.1 = insertelement <2 x i16> %ins.0, i16 %add.1, i64 1
38+
ret <2 x i16> %ins.1
39+
}
40+
41+
define <2 x i16> @usub_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) {
42+
; GFX7-LABEL: @usub_sat_v2i16(
43+
; GFX7-NEXT: bb:
44+
; GFX7-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0
45+
; GFX7-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1
46+
; GFX7-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0
47+
; GFX7-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1
48+
; GFX7-NEXT: [[ADD_0:%.*]] = call i16 @llvm.umax.i16(i16 [[ARG0_0]], i16 [[ARG1_0]])
49+
; GFX7-NEXT: [[ADD_1:%.*]] = call i16 @llvm.umax.i16(i16 [[ARG0_1]], i16 [[ARG1_1]])
50+
; GFX7-NEXT: [[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], i64 0
51+
; GFX7-NEXT: [[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 [[ADD_1]], i64 1
52+
; GFX7-NEXT: ret <2 x i16> [[INS_1]]
53+
;
54+
; GFX8-LABEL: @usub_sat_v2i16(
55+
; GFX8-NEXT: bb:
56+
; GFX8-NEXT: [[TMP0:%.*]] = call <2 x i16> @llvm.umax.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
57+
; GFX8-NEXT: ret <2 x i16> [[TMP0]]
58+
;
59+
; GFX9-LABEL: @usub_sat_v2i16(
60+
; GFX9-NEXT: bb:
61+
; GFX9-NEXT: [[TMP0:%.*]] = call <2 x i16> @llvm.umax.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
62+
; GFX9-NEXT: ret <2 x i16> [[TMP0]]
63+
;
64+
bb:
65+
%arg0.0 = extractelement <2 x i16> %arg0, i64 0
66+
%arg0.1 = extractelement <2 x i16> %arg0, i64 1
67+
%arg1.0 = extractelement <2 x i16> %arg1, i64 0
68+
%arg1.1 = extractelement <2 x i16> %arg1, i64 1
69+
%add.0 = call i16 @llvm.umax.i16(i16 %arg0.0, i16 %arg1.0)
70+
%add.1 = call i16 @llvm.umax.i16(i16 %arg0.1, i16 %arg1.1)
71+
%ins.0 = insertelement <2 x i16> undef, i16 %add.0, i64 0
72+
%ins.1 = insertelement <2 x i16> %ins.0, i16 %add.1, i64 1
73+
ret <2 x i16> %ins.1
74+
}
75+
76+
define <2 x i16> @sadd_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) {
77+
; GFX7-LABEL: @sadd_sat_v2i16(
78+
; GFX7-NEXT: bb:
79+
; GFX7-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0
80+
; GFX7-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1
81+
; GFX7-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0
82+
; GFX7-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1
83+
; GFX7-NEXT: [[ADD_0:%.*]] = call i16 @llvm.smin.i16(i16 [[ARG0_0]], i16 [[ARG1_0]])
84+
; GFX7-NEXT: [[ADD_1:%.*]] = call i16 @llvm.smin.i16(i16 [[ARG0_1]], i16 [[ARG1_1]])
85+
; GFX7-NEXT: [[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], i64 0
86+
; GFX7-NEXT: [[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 [[ADD_1]], i64 1
87+
; GFX7-NEXT: ret <2 x i16> [[INS_1]]
88+
;
89+
; GFX8-LABEL: @sadd_sat_v2i16(
90+
; GFX8-NEXT: bb:
91+
; GFX8-NEXT: [[TMP0:%.*]] = call <2 x i16> @llvm.smin.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
92+
; GFX8-NEXT: ret <2 x i16> [[TMP0]]
93+
;
94+
; GFX9-LABEL: @sadd_sat_v2i16(
95+
; GFX9-NEXT: bb:
96+
; GFX9-NEXT: [[TMP0:%.*]] = call <2 x i16> @llvm.smin.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
97+
; GFX9-NEXT: ret <2 x i16> [[TMP0]]
98+
;
99+
bb:
100+
%arg0.0 = extractelement <2 x i16> %arg0, i64 0
101+
%arg0.1 = extractelement <2 x i16> %arg0, i64 1
102+
%arg1.0 = extractelement <2 x i16> %arg1, i64 0
103+
%arg1.1 = extractelement <2 x i16> %arg1, i64 1
104+
%add.0 = call i16 @llvm.smin.i16(i16 %arg0.0, i16 %arg1.0)
105+
%add.1 = call i16 @llvm.smin.i16(i16 %arg0.1, i16 %arg1.1)
106+
%ins.0 = insertelement <2 x i16> undef, i16 %add.0, i64 0
107+
%ins.1 = insertelement <2 x i16> %ins.0, i16 %add.1, i64 1
108+
ret <2 x i16> %ins.1
109+
}
110+
111+
define <2 x i16> @ssub_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) {
112+
; GFX7-LABEL: @ssub_sat_v2i16(
113+
; GFX7-NEXT: bb:
114+
; GFX7-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0
115+
; GFX7-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1
116+
; GFX7-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0
117+
; GFX7-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1
118+
; GFX7-NEXT: [[ADD_0:%.*]] = call i16 @llvm.smax.i16(i16 [[ARG0_0]], i16 [[ARG1_0]])
119+
; GFX7-NEXT: [[ADD_1:%.*]] = call i16 @llvm.smax.i16(i16 [[ARG0_1]], i16 [[ARG1_1]])
120+
; GFX7-NEXT: [[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], i64 0
121+
; GFX7-NEXT: [[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 [[ADD_1]], i64 1
122+
; GFX7-NEXT: ret <2 x i16> [[INS_1]]
123+
;
124+
; GFX8-LABEL: @ssub_sat_v2i16(
125+
; GFX8-NEXT: bb:
126+
; GFX8-NEXT: [[TMP0:%.*]] = call <2 x i16> @llvm.smax.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
127+
; GFX8-NEXT: ret <2 x i16> [[TMP0]]
128+
;
129+
; GFX9-LABEL: @ssub_sat_v2i16(
130+
; GFX9-NEXT: bb:
131+
; GFX9-NEXT: [[TMP0:%.*]] = call <2 x i16> @llvm.smax.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
132+
; GFX9-NEXT: ret <2 x i16> [[TMP0]]
133+
;
134+
bb:
135+
%arg0.0 = extractelement <2 x i16> %arg0, i64 0
136+
%arg0.1 = extractelement <2 x i16> %arg0, i64 1
137+
%arg1.0 = extractelement <2 x i16> %arg1, i64 0
138+
%arg1.1 = extractelement <2 x i16> %arg1, i64 1
139+
%add.0 = call i16 @llvm.smax.i16(i16 %arg0.0, i16 %arg1.0)
140+
%add.1 = call i16 @llvm.smax.i16(i16 %arg0.1, i16 %arg1.1)
141+
%ins.0 = insertelement <2 x i16> undef, i16 %add.0, i64 0
142+
%ins.1 = insertelement <2 x i16> %ins.0, i16 %add.1, i64 1
143+
ret <2 x i16> %ins.1
144+
}
145+
146+
define <2 x i32> @uadd_sat_v2i32(<2 x i32> %arg0, <2 x i32> %arg1) {
147+
; GCN-LABEL: @uadd_sat_v2i32(
148+
; GCN-NEXT: bb:
149+
; GCN-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i32> [[ARG0:%.*]], i64 0
150+
; GCN-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i32> [[ARG0]], i64 1
151+
; GCN-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i32> [[ARG1:%.*]], i64 0
152+
; GCN-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i32> [[ARG1]], i64 1
153+
; GCN-NEXT: [[ADD_0:%.*]] = call i32 @llvm.umin.i32(i32 [[ARG0_0]], i32 [[ARG1_0]])
154+
; GCN-NEXT: [[ADD_1:%.*]] = call i32 @llvm.umin.i32(i32 [[ARG0_1]], i32 [[ARG1_1]])
155+
; GCN-NEXT: [[INS_0:%.*]] = insertelement <2 x i32> poison, i32 [[ADD_0]], i64 0
156+
; GCN-NEXT: [[INS_1:%.*]] = insertelement <2 x i32> [[INS_0]], i32 [[ADD_1]], i64 1
157+
; GCN-NEXT: ret <2 x i32> [[INS_1]]
158+
;
159+
bb:
160+
%arg0.0 = extractelement <2 x i32> %arg0, i64 0
161+
%arg0.1 = extractelement <2 x i32> %arg0, i64 1
162+
%arg1.0 = extractelement <2 x i32> %arg1, i64 0
163+
%arg1.1 = extractelement <2 x i32> %arg1, i64 1
164+
%add.0 = call i32 @llvm.umin.i32(i32 %arg0.0, i32 %arg1.0)
165+
%add.1 = call i32 @llvm.umin.i32(i32 %arg0.1, i32 %arg1.1)
166+
%ins.0 = insertelement <2 x i32> undef, i32 %add.0, i64 0
167+
%ins.1 = insertelement <2 x i32> %ins.0, i32 %add.1, i64 1
168+
ret <2 x i32> %ins.1
169+
}
170+
171+
define <2 x i32> @usub_sat_v2i32(<2 x i32> %arg0, <2 x i32> %arg1) {
172+
; GCN-LABEL: @usub_sat_v2i32(
173+
; GCN-NEXT: bb:
174+
; GCN-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i32> [[ARG0:%.*]], i64 0
175+
; GCN-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i32> [[ARG0]], i64 1
176+
; GCN-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i32> [[ARG1:%.*]], i64 0
177+
; GCN-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i32> [[ARG1]], i64 1
178+
; GCN-NEXT: [[ADD_0:%.*]] = call i32 @llvm.umax.i32(i32 [[ARG0_0]], i32 [[ARG1_0]])
179+
; GCN-NEXT: [[ADD_1:%.*]] = call i32 @llvm.umax.i32(i32 [[ARG0_1]], i32 [[ARG1_1]])
180+
; GCN-NEXT: [[INS_0:%.*]] = insertelement <2 x i32> poison, i32 [[ADD_0]], i64 0
181+
; GCN-NEXT: [[INS_1:%.*]] = insertelement <2 x i32> [[INS_0]], i32 [[ADD_1]], i64 1
182+
; GCN-NEXT: ret <2 x i32> [[INS_1]]
183+
;
184+
bb:
185+
%arg0.0 = extractelement <2 x i32> %arg0, i64 0
186+
%arg0.1 = extractelement <2 x i32> %arg0, i64 1
187+
%arg1.0 = extractelement <2 x i32> %arg1, i64 0
188+
%arg1.1 = extractelement <2 x i32> %arg1, i64 1
189+
%add.0 = call i32 @llvm.umax.i32(i32 %arg0.0, i32 %arg1.0)
190+
%add.1 = call i32 @llvm.umax.i32(i32 %arg0.1, i32 %arg1.1)
191+
%ins.0 = insertelement <2 x i32> undef, i32 %add.0, i64 0
192+
%ins.1 = insertelement <2 x i32> %ins.0, i32 %add.1, i64 1
193+
ret <2 x i32> %ins.1
194+
}
195+
196+
define <2 x i32> @sadd_sat_v2i32(<2 x i32> %arg0, <2 x i32> %arg1) {
197+
; GCN-LABEL: @sadd_sat_v2i32(
198+
; GCN-NEXT: bb:
199+
; GCN-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i32> [[ARG0:%.*]], i64 0
200+
; GCN-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i32> [[ARG0]], i64 1
201+
; GCN-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i32> [[ARG1:%.*]], i64 0
202+
; GCN-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i32> [[ARG1]], i64 1
203+
; GCN-NEXT: [[ADD_0:%.*]] = call i32 @llvm.smin.i32(i32 [[ARG0_0]], i32 [[ARG1_0]])
204+
; GCN-NEXT: [[ADD_1:%.*]] = call i32 @llvm.smin.i32(i32 [[ARG0_1]], i32 [[ARG1_1]])
205+
; GCN-NEXT: [[INS_0:%.*]] = insertelement <2 x i32> poison, i32 [[ADD_0]], i64 0
206+
; GCN-NEXT: [[INS_1:%.*]] = insertelement <2 x i32> [[INS_0]], i32 [[ADD_1]], i64 1
207+
; GCN-NEXT: ret <2 x i32> [[INS_1]]
208+
;
209+
bb:
210+
%arg0.0 = extractelement <2 x i32> %arg0, i64 0
211+
%arg0.1 = extractelement <2 x i32> %arg0, i64 1
212+
%arg1.0 = extractelement <2 x i32> %arg1, i64 0
213+
%arg1.1 = extractelement <2 x i32> %arg1, i64 1
214+
%add.0 = call i32 @llvm.smin.i32(i32 %arg0.0, i32 %arg1.0)
215+
%add.1 = call i32 @llvm.smin.i32(i32 %arg0.1, i32 %arg1.1)
216+
%ins.0 = insertelement <2 x i32> undef, i32 %add.0, i64 0
217+
%ins.1 = insertelement <2 x i32> %ins.0, i32 %add.1, i64 1
218+
ret <2 x i32> %ins.1
219+
}
220+
221+
define <2 x i32> @ssub_sat_v2i32(<2 x i32> %arg0, <2 x i32> %arg1) {
222+
; GCN-LABEL: @ssub_sat_v2i32(
223+
; GCN-NEXT: bb:
224+
; GCN-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i32> [[ARG0:%.*]], i64 0
225+
; GCN-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i32> [[ARG0]], i64 1
226+
; GCN-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i32> [[ARG1:%.*]], i64 0
227+
; GCN-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i32> [[ARG1]], i64 1
228+
; GCN-NEXT: [[ADD_0:%.*]] = call i32 @llvm.smax.i32(i32 [[ARG0_0]], i32 [[ARG1_0]])
229+
; GCN-NEXT: [[ADD_1:%.*]] = call i32 @llvm.smax.i32(i32 [[ARG0_1]], i32 [[ARG1_1]])
230+
; GCN-NEXT: [[INS_0:%.*]] = insertelement <2 x i32> poison, i32 [[ADD_0]], i64 0
231+
; GCN-NEXT: [[INS_1:%.*]] = insertelement <2 x i32> [[INS_0]], i32 [[ADD_1]], i64 1
232+
; GCN-NEXT: ret <2 x i32> [[INS_1]]
233+
;
234+
bb:
235+
%arg0.0 = extractelement <2 x i32> %arg0, i64 0
236+
%arg0.1 = extractelement <2 x i32> %arg0, i64 1
237+
%arg1.0 = extractelement <2 x i32> %arg1, i64 0
238+
%arg1.1 = extractelement <2 x i32> %arg1, i64 1
239+
%add.0 = call i32 @llvm.smax.i32(i32 %arg0.0, i32 %arg1.0)
240+
%add.1 = call i32 @llvm.smax.i32(i32 %arg0.1, i32 %arg1.1)
241+
%ins.0 = insertelement <2 x i32> undef, i32 %add.0, i64 0
242+
%ins.1 = insertelement <2 x i32> %ins.0, i32 %add.1, i64 1
243+
ret <2 x i32> %ins.1
244+
}
245+
246+
define <3 x i16> @uadd_sat_v3i16(<3 x i16> %arg0, <3 x i16> %arg1) {
247+
; GFX7-LABEL: @uadd_sat_v3i16(
248+
; GFX7-NEXT: bb:
249+
; GFX7-NEXT: [[ARG0_0:%.*]] = extractelement <3 x i16> [[ARG0:%.*]], i64 0
250+
; GFX7-NEXT: [[ARG0_1:%.*]] = extractelement <3 x i16> [[ARG0]], i64 1
251+
; GFX7-NEXT: [[ARG0_2:%.*]] = extractelement <3 x i16> [[ARG0]], i64 2
252+
; GFX7-NEXT: [[ARG1_0:%.*]] = extractelement <3 x i16> [[ARG1:%.*]], i64 0
253+
; GFX7-NEXT: [[ARG1_1:%.*]] = extractelement <3 x i16> [[ARG1]], i64 1
254+
; GFX7-NEXT: [[ARG1_2:%.*]] = extractelement <3 x i16> [[ARG1]], i64 2
255+
; GFX7-NEXT: [[ADD_0:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_0]], i16 [[ARG1_0]])
256+
; GFX7-NEXT: [[ADD_1:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_1]], i16 [[ARG1_1]])
257+
; GFX7-NEXT: [[ADD_2:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_2]], i16 [[ARG1_2]])
258+
; GFX7-NEXT: [[INS_0:%.*]] = insertelement <3 x i16> poison, i16 [[ADD_0]], i64 0
259+
; GFX7-NEXT: [[INS_1:%.*]] = insertelement <3 x i16> [[INS_0]], i16 [[ADD_1]], i64 1
260+
; GFX7-NEXT: [[INS_2:%.*]] = insertelement <3 x i16> [[INS_1]], i16 [[ADD_2]], i64 2
261+
; GFX7-NEXT: ret <3 x i16> [[INS_2]]
262+
;
263+
; GFX8-LABEL: @uadd_sat_v3i16(
264+
; GFX8-NEXT: bb:
265+
; GFX8-NEXT: [[ARG0_2:%.*]] = extractelement <3 x i16> [[ARG0:%.*]], i64 2
266+
; GFX8-NEXT: [[ARG1_2:%.*]] = extractelement <3 x i16> [[ARG1:%.*]], i64 2
267+
; GFX8-NEXT: [[TMP0:%.*]] = call <3 x i16> @llvm.umin.v3i16(<3 x i16> [[ARG0]], <3 x i16> [[ARG1]])
268+
; GFX8-NEXT: [[ADD_2:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_2]], i16 [[ARG1_2]])
269+
; GFX8-NEXT: [[INS_2:%.*]] = insertelement <3 x i16> [[TMP0]], i16 [[ADD_2]], i64 2
270+
; GFX8-NEXT: ret <3 x i16> [[INS_2]]
271+
;
272+
; GFX9-LABEL: @uadd_sat_v3i16(
273+
; GFX9-NEXT: bb:
274+
; GFX9-NEXT: [[ARG0_2:%.*]] = extractelement <3 x i16> [[ARG0:%.*]], i64 2
275+
; GFX9-NEXT: [[ARG1_2:%.*]] = extractelement <3 x i16> [[ARG1:%.*]], i64 2
276+
; GFX9-NEXT: [[TMP0:%.*]] = call <3 x i16> @llvm.umin.v3i16(<3 x i16> [[ARG0]], <3 x i16> [[ARG1]])
277+
; GFX9-NEXT: [[ADD_2:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_2]], i16 [[ARG1_2]])
278+
; GFX9-NEXT: [[INS_2:%.*]] = insertelement <3 x i16> [[TMP0]], i16 [[ADD_2]], i64 2
279+
; GFX9-NEXT: ret <3 x i16> [[INS_2]]
280+
;
281+
bb:
282+
%arg0.0 = extractelement <3 x i16> %arg0, i64 0
283+
%arg0.1 = extractelement <3 x i16> %arg0, i64 1
284+
%arg0.2 = extractelement <3 x i16> %arg0, i64 2
285+
%arg1.0 = extractelement <3 x i16> %arg1, i64 0
286+
%arg1.1 = extractelement <3 x i16> %arg1, i64 1
287+
%arg1.2 = extractelement <3 x i16> %arg1, i64 2
288+
%add.0 = call i16 @llvm.umin.i16(i16 %arg0.0, i16 %arg1.0)
289+
%add.1 = call i16 @llvm.umin.i16(i16 %arg0.1, i16 %arg1.1)
290+
%add.2 = call i16 @llvm.umin.i16(i16 %arg0.2, i16 %arg1.2)
291+
%ins.0 = insertelement <3 x i16> undef, i16 %add.0, i64 0
292+
%ins.1 = insertelement <3 x i16> %ins.0, i16 %add.1, i64 1
293+
%ins.2 = insertelement <3 x i16> %ins.1, i16 %add.2, i64 2
294+
ret <3 x i16> %ins.2
295+
}
296+
297+
define <4 x i16> @uadd_sat_v4i16(<4 x i16> %arg0, <4 x i16> %arg1) {
298+
; GFX7-LABEL: @uadd_sat_v4i16(
299+
; GFX7-NEXT: bb:
300+
; GFX7-NEXT: [[ARG0_0:%.*]] = extractelement <4 x i16> [[ARG0:%.*]], i64 0
301+
; GFX7-NEXT: [[ARG0_1:%.*]] = extractelement <4 x i16> [[ARG0]], i64 1
302+
; GFX7-NEXT: [[ARG0_2:%.*]] = extractelement <4 x i16> [[ARG0]], i64 2
303+
; GFX7-NEXT: [[ARG0_3:%.*]] = extractelement <4 x i16> [[ARG0]], i64 3
304+
; GFX7-NEXT: [[ARG1_0:%.*]] = extractelement <4 x i16> [[ARG1:%.*]], i64 0
305+
; GFX7-NEXT: [[ARG1_1:%.*]] = extractelement <4 x i16> [[ARG1]], i64 1
306+
; GFX7-NEXT: [[ARG1_2:%.*]] = extractelement <4 x i16> [[ARG1]], i64 2
307+
; GFX7-NEXT: [[ARG1_3:%.*]] = extractelement <4 x i16> [[ARG1]], i64 3
308+
; GFX7-NEXT: [[ADD_0:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_0]], i16 [[ARG1_0]])
309+
; GFX7-NEXT: [[ADD_1:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_1]], i16 [[ARG1_1]])
310+
; GFX7-NEXT: [[ADD_2:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_2]], i16 [[ARG1_2]])
311+
; GFX7-NEXT: [[ADD_3:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_3]], i16 [[ARG1_3]])
312+
; GFX7-NEXT: [[INS_0:%.*]] = insertelement <4 x i16> poison, i16 [[ADD_0]], i64 0
313+
; GFX7-NEXT: [[INS_1:%.*]] = insertelement <4 x i16> [[INS_0]], i16 [[ADD_1]], i64 1
314+
; GFX7-NEXT: [[INS_2:%.*]] = insertelement <4 x i16> [[INS_1]], i16 [[ADD_2]], i64 2
315+
; GFX7-NEXT: [[INS_3:%.*]] = insertelement <4 x i16> [[INS_2]], i16 [[ADD_3]], i64 3
316+
; GFX7-NEXT: ret <4 x i16> [[INS_3]]
317+
;
318+
; GFX8-LABEL: @uadd_sat_v4i16(
319+
; GFX8-NEXT: bb:
320+
; GFX8-NEXT: [[TMP0:%.*]] = call <4 x i16> @llvm.umin.v4i16(<4 x i16> [[ARG0:%.*]], <4 x i16> [[ARG1:%.*]])
321+
; GFX8-NEXT: [[TMP1:%.*]] = call <4 x i16> @llvm.umin.v4i16(<4 x i16> [[ARG0]], <4 x i16> [[ARG1]])
322+
; GFX8-NEXT: [[TMP2:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
323+
; GFX8-NEXT: [[TMP3:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
324+
; GFX8-NEXT: [[INS_31:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
325+
; GFX8-NEXT: ret <4 x i16> [[INS_31]]
326+
;
327+
; GFX9-LABEL: @uadd_sat_v4i16(
328+
; GFX9-NEXT: bb:
329+
; GFX9-NEXT: [[TMP0:%.*]] = call <4 x i16> @llvm.umin.v4i16(<4 x i16> [[ARG0:%.*]], <4 x i16> [[ARG1:%.*]])
330+
; GFX9-NEXT: [[TMP1:%.*]] = call <4 x i16> @llvm.umin.v4i16(<4 x i16> [[ARG0]], <4 x i16> [[ARG1]])
331+
; GFX9-NEXT: [[TMP2:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
332+
; GFX9-NEXT: [[TMP3:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
333+
; GFX9-NEXT: [[INS_31:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
334+
; GFX9-NEXT: ret <4 x i16> [[INS_31]]
335+
;
336+
bb:
337+
%arg0.0 = extractelement <4 x i16> %arg0, i64 0
338+
%arg0.1 = extractelement <4 x i16> %arg0, i64 1
339+
%arg0.2 = extractelement <4 x i16> %arg0, i64 2
340+
%arg0.3 = extractelement <4 x i16> %arg0, i64 3
341+
%arg1.0 = extractelement <4 x i16> %arg1, i64 0
342+
%arg1.1 = extractelement <4 x i16> %arg1, i64 1
343+
%arg1.2 = extractelement <4 x i16> %arg1, i64 2
344+
%arg1.3 = extractelement <4 x i16> %arg1, i64 3
345+
%add.0 = call i16 @llvm.umin.i16(i16 %arg0.0, i16 %arg1.0)
346+
%add.1 = call i16 @llvm.umin.i16(i16 %arg0.1, i16 %arg1.1)
347+
%add.2 = call i16 @llvm.umin.i16(i16 %arg0.2, i16 %arg1.2)
348+
%add.3 = call i16 @llvm.umin.i16(i16 %arg0.3, i16 %arg1.3)
349+
%ins.0 = insertelement <4 x i16> undef, i16 %add.0, i64 0
350+
%ins.1 = insertelement <4 x i16> %ins.0, i16 %add.1, i64 1
351+
%ins.2 = insertelement <4 x i16> %ins.1, i16 %add.2, i64 2
352+
%ins.3 = insertelement <4 x i16> %ins.2, i16 %add.3, i64 3
353+
ret <4 x i16> %ins.3
354+
}
355+
356+
declare i16 @llvm.umin.i16(i16, i16) #0
357+
declare i16 @llvm.umax.i16(i16, i16) #0
358+
declare i16 @llvm.smin.i16(i16, i16) #0
359+
declare i16 @llvm.smax.i16(i16, i16) #0
360+
361+
declare i32 @llvm.umin.i32(i32, i32) #0
362+
declare i32 @llvm.umax.i32(i32, i32) #0
363+
declare i32 @llvm.smin.i32(i32, i32) #0
364+
declare i32 @llvm.smax.i32(i32, i32) #0
365+
366+
attributes #0 = { nounwind readnone speculatable willreturn }

0 commit comments

Comments
 (0)