@@ -188,3 +188,99 @@ define i32 @test_cvt_sr_fp8_f32_byte3(float %x, i32 %r, i32 %old) {
188
188
%ret = tail call i32 @llvm.amdgcn.cvt.sr.fp8.f32 (float %x , i32 %r , i32 %old , i32 3 )
189
189
ret i32 %ret
190
190
}
191
+
192
+ define float @test_sext_cvt_f32_fp8 (i16 %a ) {
193
+ ; GFX940-LABEL: test_sext_cvt_f32_fp8:
194
+ ; GFX940: ; %bb.0:
195
+ ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
196
+ ; GFX940-NEXT: v_bfe_i32 v0, v0, 0, 16
197
+ ; GFX940-NEXT: v_cvt_f32_fp8_sdwa v0, v0 src0_sel:BYTE_1
198
+ ; GFX940-NEXT: s_setpc_b64 s[30:31]
199
+ ;
200
+ ; GFX12-LABEL: test_sext_cvt_f32_fp8:
201
+ ; GFX12: ; %bb.0:
202
+ ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
203
+ ; GFX12-NEXT: s_wait_expcnt 0x0
204
+ ; GFX12-NEXT: s_wait_samplecnt 0x0
205
+ ; GFX12-NEXT: s_wait_bvhcnt 0x0
206
+ ; GFX12-NEXT: s_wait_kmcnt 0x0
207
+ ; GFX12-NEXT: v_bfe_i32 v0, v0, 0, 16
208
+ ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
209
+ ; GFX12-NEXT: v_cvt_f32_fp8_e64 v0, v0 op_sel:[0,1]
210
+ ; GFX12-NEXT: s_setpc_b64 s[30:31]
211
+ %a.sext = sext i16 %a to i32
212
+ %ret = tail call float @llvm.amdgcn.cvt.f32.fp8 (i32 %a.sext , i32 1 )
213
+ ret float %ret
214
+ }
215
+
216
+ define float @test_sext_cvt_f32_bf8 (i16 %a ) {
217
+ ; GFX940-LABEL: test_sext_cvt_f32_bf8:
218
+ ; GFX940: ; %bb.0:
219
+ ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
220
+ ; GFX940-NEXT: v_bfe_i32 v0, v0, 0, 16
221
+ ; GFX940-NEXT: v_cvt_f32_bf8_sdwa v0, v0 src0_sel:BYTE_1
222
+ ; GFX940-NEXT: s_setpc_b64 s[30:31]
223
+ ;
224
+ ; GFX12-LABEL: test_sext_cvt_f32_bf8:
225
+ ; GFX12: ; %bb.0:
226
+ ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
227
+ ; GFX12-NEXT: s_wait_expcnt 0x0
228
+ ; GFX12-NEXT: s_wait_samplecnt 0x0
229
+ ; GFX12-NEXT: s_wait_bvhcnt 0x0
230
+ ; GFX12-NEXT: s_wait_kmcnt 0x0
231
+ ; GFX12-NEXT: v_bfe_i32 v0, v0, 0, 16
232
+ ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
233
+ ; GFX12-NEXT: v_cvt_f32_bf8_e64 v0, v0 op_sel:[0,1]
234
+ ; GFX12-NEXT: s_setpc_b64 s[30:31]
235
+ %a.sext = sext i16 %a to i32
236
+ %ret = tail call float @llvm.amdgcn.cvt.f32.bf8 (i32 %a.sext , i32 1 )
237
+ ret float %ret
238
+ }
239
+
240
+ define <2 x float > @test_sext_cvt_pk_f32_bf8_word1 (i16 %a ) {
241
+ ; GFX940-LABEL: test_sext_cvt_pk_f32_bf8_word1:
242
+ ; GFX940: ; %bb.0:
243
+ ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
244
+ ; GFX940-NEXT: v_bfe_i32 v0, v0, 0, 16
245
+ ; GFX940-NEXT: v_cvt_pk_f32_bf8_sdwa v[0:1], v0 src0_sel:WORD_1
246
+ ; GFX940-NEXT: s_setpc_b64 s[30:31]
247
+ ;
248
+ ; GFX12-LABEL: test_sext_cvt_pk_f32_bf8_word1:
249
+ ; GFX12: ; %bb.0:
250
+ ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
251
+ ; GFX12-NEXT: s_wait_expcnt 0x0
252
+ ; GFX12-NEXT: s_wait_samplecnt 0x0
253
+ ; GFX12-NEXT: s_wait_bvhcnt 0x0
254
+ ; GFX12-NEXT: s_wait_kmcnt 0x0
255
+ ; GFX12-NEXT: v_bfe_i32 v0, v0, 0, 16
256
+ ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
257
+ ; GFX12-NEXT: v_cvt_pk_f32_bf8_e64 v[0:1], v0 op_sel:[1,0]
258
+ ; GFX12-NEXT: s_setpc_b64 s[30:31]
259
+ %a.sext = sext i16 %a to i32
260
+ %ret = tail call <2 x float > @llvm.amdgcn.cvt.pk.f32.bf8 (i32 %a.sext , i1 true )
261
+ ret <2 x float > %ret
262
+ }
263
+
264
+ define <2 x float > @test_sext_cvt_pk_f32_fp8_word0 (i16 %a ) {
265
+ ; GFX940-LABEL: test_sext_cvt_pk_f32_fp8_word0:
266
+ ; GFX940: ; %bb.0:
267
+ ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
268
+ ; GFX940-NEXT: v_bfe_i32 v0, v0, 0, 16
269
+ ; GFX940-NEXT: v_cvt_pk_f32_fp8_e32 v[0:1], v0
270
+ ; GFX940-NEXT: s_setpc_b64 s[30:31]
271
+ ;
272
+ ; GFX12-LABEL: test_sext_cvt_pk_f32_fp8_word0:
273
+ ; GFX12: ; %bb.0:
274
+ ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
275
+ ; GFX12-NEXT: s_wait_expcnt 0x0
276
+ ; GFX12-NEXT: s_wait_samplecnt 0x0
277
+ ; GFX12-NEXT: s_wait_bvhcnt 0x0
278
+ ; GFX12-NEXT: s_wait_kmcnt 0x0
279
+ ; GFX12-NEXT: v_bfe_i32 v0, v0, 0, 16
280
+ ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
281
+ ; GFX12-NEXT: v_cvt_pk_f32_fp8_e32 v[0:1], v0
282
+ ; GFX12-NEXT: s_setpc_b64 s[30:31]
283
+ %a.sext = sext i16 %a to i32
284
+ %ret = tail call <2 x float > @llvm.amdgcn.cvt.pk.f32.fp8 (i32 %a.sext , i1 false )
285
+ ret <2 x float > %ret
286
+ }
0 commit comments