1
1
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2
2
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -passes=slp-vectorizer,instcombine %s | FileCheck -check-prefixes=GCN,GFX7 %s
3
3
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -passes=slp-vectorizer,instcombine %s | FileCheck -check-prefixes=GCN,GFX8 %s
4
- ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=slp-vectorizer,instcombine %s | FileCheck -check-prefixes=GCN,GFX8 %s
4
+ ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=slp-vectorizer,instcombine %s | FileCheck -check-prefixes=GCN,GFX9 %s
5
5
6
6
define <2 x i16 > @uadd_sat_v2i16 (<2 x i16 > %arg0 , <2 x i16 > %arg1 ) {
7
7
; GFX7-LABEL: @uadd_sat_v2i16(
@@ -21,6 +21,11 @@ define <2 x i16> @uadd_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) {
21
21
; GFX8-NEXT: [[TMP0:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
22
22
; GFX8-NEXT: ret <2 x i16> [[TMP0]]
23
23
;
24
+ ; GFX9-LABEL: @uadd_sat_v2i16(
25
+ ; GFX9-NEXT: bb:
26
+ ; GFX9-NEXT: [[TMP0:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
27
+ ; GFX9-NEXT: ret <2 x i16> [[TMP0]]
28
+ ;
24
29
bb:
25
30
%arg0.0 = extractelement <2 x i16 > %arg0 , i64 0
26
31
%arg0.1 = extractelement <2 x i16 > %arg0 , i64 1
@@ -51,6 +56,11 @@ define <2 x i16> @usub_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) {
51
56
; GFX8-NEXT: [[TMP0:%.*]] = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
52
57
; GFX8-NEXT: ret <2 x i16> [[TMP0]]
53
58
;
59
+ ; GFX9-LABEL: @usub_sat_v2i16(
60
+ ; GFX9-NEXT: bb:
61
+ ; GFX9-NEXT: [[TMP0:%.*]] = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
62
+ ; GFX9-NEXT: ret <2 x i16> [[TMP0]]
63
+ ;
54
64
bb:
55
65
%arg0.0 = extractelement <2 x i16 > %arg0 , i64 0
56
66
%arg0.1 = extractelement <2 x i16 > %arg0 , i64 1
@@ -81,6 +91,11 @@ define <2 x i16> @sadd_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) {
81
91
; GFX8-NEXT: [[TMP0:%.*]] = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
82
92
; GFX8-NEXT: ret <2 x i16> [[TMP0]]
83
93
;
94
+ ; GFX9-LABEL: @sadd_sat_v2i16(
95
+ ; GFX9-NEXT: bb:
96
+ ; GFX9-NEXT: [[TMP0:%.*]] = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
97
+ ; GFX9-NEXT: ret <2 x i16> [[TMP0]]
98
+ ;
84
99
bb:
85
100
%arg0.0 = extractelement <2 x i16 > %arg0 , i64 0
86
101
%arg0.1 = extractelement <2 x i16 > %arg0 , i64 1
@@ -111,6 +126,11 @@ define <2 x i16> @ssub_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) {
111
126
; GFX8-NEXT: [[TMP0:%.*]] = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
112
127
; GFX8-NEXT: ret <2 x i16> [[TMP0]]
113
128
;
129
+ ; GFX9-LABEL: @ssub_sat_v2i16(
130
+ ; GFX9-NEXT: bb:
131
+ ; GFX9-NEXT: [[TMP0:%.*]] = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
132
+ ; GFX9-NEXT: ret <2 x i16> [[TMP0]]
133
+ ;
114
134
bb:
115
135
%arg0.0 = extractelement <2 x i16 > %arg0 , i64 0
116
136
%arg0.1 = extractelement <2 x i16 > %arg0 , i64 1
@@ -252,6 +272,18 @@ define <3 x i16> @uadd_sat_v3i16(<3 x i16> %arg0, <3 x i16> %arg1) {
252
272
; GFX8-NEXT: [[INS_2:%.*]] = insertelement <3 x i16> [[TMP3]], i16 [[ADD_2]], i64 2
253
273
; GFX8-NEXT: ret <3 x i16> [[INS_2]]
254
274
;
275
+ ; GFX9-LABEL: @uadd_sat_v3i16(
276
+ ; GFX9-NEXT: bb:
277
+ ; GFX9-NEXT: [[ARG0_2:%.*]] = extractelement <3 x i16> [[ARG0:%.*]], i64 2
278
+ ; GFX9-NEXT: [[ARG1_2:%.*]] = extractelement <3 x i16> [[ARG1:%.*]], i64 2
279
+ ; GFX9-NEXT: [[TMP0:%.*]] = shufflevector <3 x i16> [[ARG0]], <3 x i16> poison, <2 x i32> <i32 0, i32 1>
280
+ ; GFX9-NEXT: [[TMP1:%.*]] = shufflevector <3 x i16> [[ARG1]], <3 x i16> poison, <2 x i32> <i32 0, i32 1>
281
+ ; GFX9-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
282
+ ; GFX9-NEXT: [[ADD_2:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_2]], i16 [[ARG1_2]])
283
+ ; GFX9-NEXT: [[TMP3:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <3 x i32> <i32 0, i32 1, i32 poison>
284
+ ; GFX9-NEXT: [[INS_2:%.*]] = insertelement <3 x i16> [[TMP3]], i16 [[ADD_2]], i64 2
285
+ ; GFX9-NEXT: ret <3 x i16> [[INS_2]]
286
+ ;
255
287
bb:
256
288
%arg0.0 = extractelement <3 x i16 > %arg0 , i64 0
257
289
%arg0.1 = extractelement <3 x i16 > %arg0 , i64 1
@@ -291,19 +323,25 @@ define <4 x i16> @uadd_sat_v4i16(<4 x i16> %arg0, <4 x i16> %arg1) {
291
323
;
292
324
; GFX8-LABEL: @uadd_sat_v4i16(
293
325
; GFX8-NEXT: bb:
294
- ; GFX8-NEXT: [[ARG0_2:%.*]] = extractelement <4 x i16> [[ARG0:%.*]], i64 2
295
- ; GFX8-NEXT: [[ARG0_3:%.*]] = extractelement <4 x i16> [[ARG0]], i64 3
296
- ; GFX8-NEXT: [[ARG1_2:%.*]] = extractelement <4 x i16> [[ARG1:%.*]], i64 2
297
- ; GFX8-NEXT: [[ARG1_3:%.*]] = extractelement <4 x i16> [[ARG1]], i64 3
298
- ; GFX8-NEXT: [[TMP0:%.*]] = shufflevector <4 x i16> [[ARG0]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
299
- ; GFX8-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[ARG1]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
326
+ ; GFX8-NEXT: [[TMP0:%.*]] = shufflevector <4 x i16> [[ARG0:%.*]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
327
+ ; GFX8-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[ARG1:%.*]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
300
328
; GFX8-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
301
- ; GFX8-NEXT: [[ADD_2:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_2]], i16 [[ARG1_2]])
302
- ; GFX8-NEXT: [[ADD_3:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_3]], i16 [[ARG1_3]])
303
- ; GFX8-NEXT: [[TMP3:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
304
- ; GFX8-NEXT: [[INS_2:%.*]] = insertelement <4 x i16> [[TMP3]], i16 [[ADD_2]], i64 2
305
- ; GFX8-NEXT: [[INS_3:%.*]] = insertelement <4 x i16> [[INS_2]], i16 [[ADD_3]], i64 3
306
- ; GFX8-NEXT: ret <4 x i16> [[INS_3]]
329
+ ; GFX8-NEXT: [[TMP3:%.*]] = shufflevector <4 x i16> [[ARG0]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
330
+ ; GFX8-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[ARG1]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
331
+ ; GFX8-NEXT: [[TMP5:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP3]], <2 x i16> [[TMP4]])
332
+ ; GFX8-NEXT: [[INS_31:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
333
+ ; GFX8-NEXT: ret <4 x i16> [[INS_31]]
334
+ ;
335
+ ; GFX9-LABEL: @uadd_sat_v4i16(
336
+ ; GFX9-NEXT: bb:
337
+ ; GFX9-NEXT: [[TMP0:%.*]] = shufflevector <4 x i16> [[ARG0:%.*]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
338
+ ; GFX9-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[ARG1:%.*]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
339
+ ; GFX9-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
340
+ ; GFX9-NEXT: [[TMP3:%.*]] = shufflevector <4 x i16> [[ARG0]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
341
+ ; GFX9-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[ARG1]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
342
+ ; GFX9-NEXT: [[TMP5:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP3]], <2 x i16> [[TMP4]])
343
+ ; GFX9-NEXT: [[INS_31:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
344
+ ; GFX9-NEXT: ret <4 x i16> [[INS_31]]
307
345
;
308
346
bb:
309
347
%arg0.0 = extractelement <4 x i16 > %arg0 , i64 0
0 commit comments