1
- ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -enable-misched=false < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
1
+ ; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs -enable-misched=false < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX906 %s
2
+ ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -enable-misched=false < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX900 %s
2
3
; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs -enable-misched=false < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI %s
3
4
; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs -enable-misched=false < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI,CI %s
4
5
5
6
; GCN-LABEL: mixlo_simple:
6
7
; GCN: s_waitcnt
7
- ; GFX9-NEXT: v_mad_mixlo_f16 v0, v0, v1, v2{{$}}
8
+ ; GFX900-NEXT: v_mad_mixlo_f16 v0, v0, v1, v2{{$}}
9
+ ; GFX906-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2{{$}}
8
10
; GFX9-NEXT: s_setpc_b64
9
11
10
12
; CIVI: v_mac_f32_e32
@@ -16,7 +18,8 @@ define half @mixlo_simple(float %src0, float %src1, float %src2) #0 {
16
18
}
17
19
18
20
; GCN-LABEL: {{^}}v_mad_mixlo_f16_f16lo_f16lo_f16lo:
19
- ; GFX9: v_mad_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1]{{$}}
21
+ ; GFX900: v_mad_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1]{{$}}
22
+ ; GFX906: v_fma_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1]{{$}}
20
23
; CI: v_mac_f32
21
24
; CIVI: v_cvt_f16_f32
22
25
define half @v_mad_mixlo_f16_f16lo_f16lo_f16lo (half %src0 , half %src1 , half %src2 ) #0 {
@@ -30,7 +33,8 @@ define half @v_mad_mixlo_f16_f16lo_f16lo_f16lo(half %src0, half %src1, half %src
30
33
31
34
; GCN-LABEL: {{^}}v_mad_mixlo_f16_f16lo_f16lo_f32:
32
35
; GCN: s_waitcnt
33
- ; GFX9-NEXT: v_mad_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,0]{{$}}
36
+ ; GFX900-NEXT: v_mad_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,0]{{$}}
37
+ ; GFX906-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,0]{{$}}
34
38
; GFX9-NEXT: s_setpc_b64
35
39
36
40
; CIVI: v_mac_f32
@@ -44,7 +48,8 @@ define half @v_mad_mixlo_f16_f16lo_f16lo_f32(half %src0, half %src1, float %src2
44
48
45
49
; GCN-LABEL: {{^}}v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt:
46
50
; GCN: s_waitcnt
47
- ; GFX9-NEXT: v_mad_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,0] clamp{{$}}
51
+ ; GFX900-NEXT: v_mad_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,0] clamp{{$}}
52
+ ; GFX906-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,0] clamp{{$}}
48
53
; GFX9-NEXT: s_setpc_b64
49
54
50
55
; CIVI: v_mac_f32_e32 v{{[0-9]}}, v{{[0-9]}}, v{{[0-9]$}}
@@ -60,7 +65,8 @@ define half @v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt(half %src0, half %sr
60
65
61
66
; GCN-LABEL: {{^}}v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_pre_cvt:
62
67
; GCN: s_waitcnt
63
- ; GFX9-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] clamp{{$}}
68
+ ; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] clamp{{$}}
69
+ ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] clamp{{$}}
64
70
; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0
65
71
; GFX9-NEXT: s_setpc_b64
66
72
@@ -79,8 +85,13 @@ define half @v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_pre_cvt(half %src0, half %src
79
85
; operation only clobbers relevant lane.
80
86
; GCN-LABEL: {{^}}v_mad_mix_v2f32:
81
87
; GCN: s_waitcnt
82
- ; GFX9-NEXT: v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1]{{$}}
83
- ; GFX9-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]{{$}}
88
+
89
+ ; GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1]{{$}}
90
+ ; GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]{{$}}
91
+
92
+ ; GFX906-NEXT: v_fma_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1]{{$}}
93
+ ; GFX906-NEXT: v_fma_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]{{$}}
94
+
84
95
; GFX9-NEXT: v_mov_b32_e32 v0, v3
85
96
; GFX9-NEXT: s_setpc_b64
86
97
define <2 x half > @v_mad_mix_v2f32 (<2 x half > %src0 , <2 x half > %src1 , <2 x half > %src2 ) #0 {
@@ -94,9 +105,14 @@ define <2 x half> @v_mad_mix_v2f32(<2 x half> %src0, <2 x half> %src1, <2 x half
94
105
95
106
; GCN-LABEL: {{^}}v_mad_mix_v3f32:
96
107
; GCN: s_waitcnt
97
- ; GFX9-NEXT: v_mad_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
98
- ; GFX9-NEXT: v_mad_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1]
99
- ; GFX9-NEXT: v_mad_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
108
+ ; GFX900-NEXT: v_mad_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
109
+ ; GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1]
110
+ ; GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
111
+
112
+ ; GFX906-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
113
+ ; GFX906-NEXT: v_fma_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1]
114
+ ; GFX906-NEXT: v_fma_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
115
+
100
116
; GFX9-NEXT: v_mov_b32_e32 v0, v3
101
117
; GFX9-NEXT: s_setpc_b64
102
118
define <3 x half > @v_mad_mix_v3f32 (<3 x half > %src0 , <3 x half > %src1 , <3 x half > %src2 ) #0 {
@@ -110,10 +126,16 @@ define <3 x half> @v_mad_mix_v3f32(<3 x half> %src0, <3 x half> %src1, <3 x half
110
126
111
127
; GCN-LABEL: {{^}}v_mad_mix_v4f32:
112
128
; GCN: s_waitcnt
113
- ; GFX9-NEXT: v_mad_mixlo_f16 v6, v1, v3, v5 op_sel_hi:[1,1,1]
114
- ; GFX9-NEXT: v_mad_mixlo_f16 v7, v0, v2, v4 op_sel_hi:[1,1,1]
115
- ; GFX9-NEXT: v_mad_mixhi_f16 v7, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
116
- ; GFX9-NEXT: v_mad_mixhi_f16 v6, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1]
129
+ ; GFX900-NEXT: v_mad_mixlo_f16 v6, v1, v3, v5 op_sel_hi:[1,1,1]
130
+ ; GFX900-NEXT: v_mad_mixlo_f16 v7, v0, v2, v4 op_sel_hi:[1,1,1]
131
+ ; GFX900-NEXT: v_mad_mixhi_f16 v7, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
132
+ ; GFX900-NEXT: v_mad_mixhi_f16 v6, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1]
133
+
134
+ ; GFX906-NEXT: v_fma_mixlo_f16 v6, v1, v3, v5 op_sel_hi:[1,1,1]
135
+ ; GFX906-NEXT: v_fma_mixlo_f16 v7, v0, v2, v4 op_sel_hi:[1,1,1]
136
+ ; GFX906-NEXT: v_fma_mixhi_f16 v7, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
137
+ ; GFX906-NEXT: v_fma_mixhi_f16 v6, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1]
138
+
117
139
; GFX9-NEXT: v_mov_b32_e32 v0, v7
118
140
; GFX9-NEXT: v_mov_b32_e32 v1, v6
119
141
; GFX9-NEXT: s_setpc_b64
@@ -128,8 +150,12 @@ define <4 x half> @v_mad_mix_v4f32(<4 x half> %src0, <4 x half> %src1, <4 x half
128
150
129
151
; FIXME: Fold clamp
130
152
; GCN-LABEL: {{^}}v_mad_mix_v2f32_clamp_postcvt:
131
- ; GFX9: v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] clamp{{$}}
132
- ; GFX9-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp{{$}}
153
+ ; GFX900: v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] clamp{{$}}
154
+ ; GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp{{$}}
155
+
156
+ ; GFX906: v_fma_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] clamp{{$}}
157
+ ; GFX906-NEXT: v_fma_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp{{$}}
158
+
133
159
; GFX9-NEXT: v_mov_b32_e32 v0, v3
134
160
; GFX9-NEXT: s_setpc_b64
135
161
define <2 x half > @v_mad_mix_v2f32_clamp_postcvt (<2 x half > %src0 , <2 x half > %src1 , <2 x half > %src2 ) #0 {
@@ -146,9 +172,15 @@ define <2 x half> @v_mad_mix_v2f32_clamp_postcvt(<2 x half> %src0, <2 x half> %s
146
172
; FIXME: Should be packed into 2 registers per argument?
147
173
; GCN-LABEL: {{^}}v_mad_mix_v3f32_clamp_postcvt:
148
174
; GCN: s_waitcnt
149
- ; GFX9-DAG: v_mad_mixlo_f16 v{{[0-9]+}}, v0, v2, v4 op_sel_hi:[1,1,1] clamp
150
- ; GFX9-DAG: v_mad_mixhi_f16 v{{[0-9]+}}, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
151
- ; GFX9-DAG: v_mad_mixlo_f16 v{{[0-9]+}}, v1, v3, v5 op_sel_hi:[1,1,1]
175
+ ; GFX900-DAG: v_mad_mixlo_f16 v{{[0-9]+}}, v0, v2, v4 op_sel_hi:[1,1,1] clamp
176
+ ; GFX900-DAG: v_mad_mixhi_f16 v{{[0-9]+}}, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
177
+ ; GFX900-DAG: v_mad_mixlo_f16 v{{[0-9]+}}, v1, v3, v5 op_sel_hi:[1,1,1]
178
+
179
+ ; GFX906-DAG: v_fma_mixlo_f16 v{{[0-9]+}}, v0, v2, v4 op_sel_hi:[1,1,1] clamp
180
+ ; GFX906-DAG: v_fma_mixhi_f16 v{{[0-9]+}}, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
181
+ ; GFX906-DAG: v_fma_mixlo_f16 v{{[0-9]+}}, v1, v3, v5 op_sel_hi:[1,1,1]
182
+
183
+
152
184
; GFX9-DAG: v_pk_max_f16 v1, v1, v1 clamp
153
185
; GFX9: v_mov_b32_e32 v0, v{{[0-9]+}}
154
186
; GFX9-NEXT: s_setpc_b64
@@ -165,10 +197,18 @@ define <3 x half> @v_mad_mix_v3f32_clamp_postcvt(<3 x half> %src0, <3 x half> %s
165
197
166
198
; GCN-LABEL: {{^}}v_mad_mix_v4f32_clamp_postcvt:
167
199
; GCN: s_waitcnt
168
- ; GFX9-NEXT: v_mad_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
169
- ; GFX9-NEXT: v_mad_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
170
- ; GFX9-NEXT: v_mad_mixlo_f16 v2, v1, v3, v5 op_sel_hi:[1,1,1] clamp
171
- ; GFX9-NEXT: v_mad_mixhi_f16 v2, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
200
+ ; GFX900-NEXT: v_mad_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
201
+ ; GFX900-NEXT: v_mad_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
202
+ ; GFX900-NEXT: v_mad_mixlo_f16 v2, v1, v3, v5 op_sel_hi:[1,1,1] clamp
203
+ ; GFX900-NEXT: v_mad_mixhi_f16 v2, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
204
+
205
+
206
+ ; GFX906-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
207
+ ; GFX906-NEXT: v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
208
+ ; GFX906-NEXT: v_fma_mixlo_f16 v2, v1, v3, v5 op_sel_hi:[1,1,1] clamp
209
+ ; GFX906-NEXT: v_fma_mixhi_f16 v2, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
210
+
211
+
172
212
; GFX9-NEXT: v_mov_b32_e32 v0, v6
173
213
; GFX9-NEXT: v_mov_b32_e32 v1, v2
174
214
; GFX9-NEXT: s_setpc_b64
@@ -185,8 +225,12 @@ define <4 x half> @v_mad_mix_v4f32_clamp_postcvt(<4 x half> %src0, <4 x half> %s
185
225
186
226
; GCN-LABEL: {{^}}v_mad_mix_v2f32_clamp_postcvt_lo:
187
227
; GCN: s_waitcnt
188
- ; GFX9-NEXT: v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] clamp
189
- ; GFX9-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
228
+ ; GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] clamp
229
+ ; GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
230
+
231
+ ; GFX906-NEXT: v_fma_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] clamp
232
+ ; GFX906-NEXT: v_fma_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
233
+
190
234
; GFX9-NEXT: v_mov_b32_e32 v0, v3
191
235
; GFX9-NEXT: s_setpc_b64
192
236
define <2 x half > @v_mad_mix_v2f32_clamp_postcvt_lo (<2 x half > %src0 , <2 x half > %src1 , <2 x half > %src2 ) #0 {
@@ -204,8 +248,12 @@ define <2 x half> @v_mad_mix_v2f32_clamp_postcvt_lo(<2 x half> %src0, <2 x half>
204
248
205
249
; GCN-LABEL: {{^}}v_mad_mix_v2f32_clamp_postcvt_hi:
206
250
; GCN: s_waitcnt
207
- ; GFX9-NEXT: v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1]
208
- ; GFX9-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
251
+ ; GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1]
252
+ ; GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
253
+
254
+ ; GFX906-NEXT: v_fma_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1]
255
+ ; GFX906-NEXT: v_fma_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
256
+
209
257
; GFX9-NEXT: v_mov_b32_e32 v0, v3
210
258
; GFX9-NEXT: s_setpc_b64
211
259
define <2 x half > @v_mad_mix_v2f32_clamp_postcvt_hi (<2 x half > %src0 , <2 x half > %src1 , <2 x half > %src2 ) #0 {
@@ -223,8 +271,12 @@ define <2 x half> @v_mad_mix_v2f32_clamp_postcvt_hi(<2 x half> %src0, <2 x half>
223
271
224
272
; FIXME: Should be able to use mixlo/mixhi
225
273
; GCN-LABEL: {{^}}v_mad_mix_v2f32_clamp_precvt:
226
- ; GFX9: v_mad_mix_f32 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
227
- ; GFX9-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] clamp
274
+ ; GFX900: v_mad_mix_f32 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
275
+ ; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] clamp
276
+
277
+ ; GFX906: v_fma_mix_f32 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
278
+ ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] clamp
279
+
228
280
; GFX9: v_cvt_f16_f32_e32 v1, v3
229
281
; GFX9: v_cvt_f16_f32_e32 v0, v0
230
282
; GFX9: v_pack_b32_f16 v0, v0, v1
@@ -243,9 +295,15 @@ define <2 x half> @v_mad_mix_v2f32_clamp_precvt(<2 x half> %src0, <2 x half> %sr
243
295
; FIXME: Handling undef 4th component
244
296
; GCN-LABEL: {{^}}v_mad_mix_v3f32_clamp_precvt:
245
297
; GCN: s_waitcnt
246
- ; GFX9-NEXT: v_mad_mix_f32 v1, v1, v3, v5 op_sel_hi:[1,1,1] clamp
247
- ; GFX9-NEXT: v_mad_mix_f32 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
248
- ; GFX9-NEXT: v_mad_mix_f32 v0, v0, v2, v4 op_sel_hi:[1,1,1] clamp
298
+ ; GFX900-NEXT: v_mad_mix_f32 v1, v1, v3, v5 op_sel_hi:[1,1,1] clamp
299
+ ; GFX900-NEXT: v_mad_mix_f32 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
300
+ ; GFX900-NEXT: v_mad_mix_f32 v0, v0, v2, v4 op_sel_hi:[1,1,1] clamp
301
+
302
+ ; GFX906-NEXT: v_fma_mix_f32 v1, v1, v3, v5 op_sel_hi:[1,1,1] clamp
303
+ ; GFX906-NEXT: v_fma_mix_f32 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
304
+ ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v2, v4 op_sel_hi:[1,1,1] clamp
305
+
306
+
249
307
; GFX9-NEXT: v_cvt_f16_f32_e32 v2, v3
250
308
; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0
251
309
; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v1
@@ -263,10 +321,16 @@ define <3 x half> @v_mad_mix_v3f32_clamp_precvt(<3 x half> %src0, <3 x half> %sr
263
321
}
264
322
265
323
; GCN-LABEL: {{^}}v_mad_mix_v4f32_clamp_precvt:
266
- ; GFX9: v_mad_mix_f32 v6, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
267
- ; GFX9: v_mad_mix_f32 v1, v1, v3, v5 op_sel_hi:[1,1,1] clamp
268
- ; GFX9: v_mad_mix_f32 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
269
- ; GFX9: v_mad_mix_f32 v0, v0, v2, v4 op_sel_hi:[1,1,1] clamp
324
+ ; GFX900: v_mad_mix_f32 v6, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
325
+ ; GFX900: v_mad_mix_f32 v1, v1, v3, v5 op_sel_hi:[1,1,1] clamp
326
+ ; GFX900: v_mad_mix_f32 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
327
+ ; GFX900: v_mad_mix_f32 v0, v0, v2, v4 op_sel_hi:[1,1,1] clamp
328
+
329
+
330
+ ; GFX906: v_fma_mix_f32 v6, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
331
+ ; GFX906: v_fma_mix_f32 v1, v1, v3, v5 op_sel_hi:[1,1,1] clamp
332
+ ; GFX906: v_fma_mix_f32 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
333
+ ; GFX906: v_fma_mix_f32 v0, v0, v2, v4 op_sel_hi:[1,1,1] clamp
270
334
271
335
; GFX9: v_cvt_f16_f32
272
336
; GFX9: v_cvt_f16_f32
0 commit comments