Skip to content

Commit 122eb3d

Browse files
Pierre-vhyanyao-wang
authored andcommitted
[AMDGPU] Don't form sext/abs/neg fp8 cvt (llvm#83843)
gfx940 does not allow abs/sext/neg on v_cvt_fp8/bf8 & pk variants. Fixes SWDEV-447468 Change-Id: I818c4e029b04728bbf0fe15c5fff96c3727a7e97
1 parent 95f9a87 commit 122eb3d

File tree

4 files changed

+157
-0
lines changed

4 files changed

+157
-0
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4479,6 +4479,20 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
44794479
}
44804480
}
44814481

4482+
if (Opcode == AMDGPU::V_CVT_F32_FP8_sdwa ||
4483+
Opcode == AMDGPU::V_CVT_F32_BF8_sdwa ||
4484+
Opcode == AMDGPU::V_CVT_PK_F32_FP8_sdwa ||
4485+
Opcode == AMDGPU::V_CVT_PK_F32_BF8_sdwa) {
4486+
const MachineOperand *Src0ModsMO =
4487+
getNamedOperand(MI, AMDGPU::OpName::src0_modifiers);
4488+
unsigned Mods = Src0ModsMO->getImm();
4489+
if (Mods & SISrcMods::ABS || Mods & SISrcMods::NEG ||
4490+
Mods & SISrcMods::SEXT) {
4491+
ErrInfo = "sext, abs and neg are not allowed on this instruction";
4492+
return false;
4493+
}
4494+
}
4495+
44824496
uint16_t BasicOpcode = AMDGPU::getBasicFromSDWAOp(Opcode);
44834497
if (isVOPC(BasicOpcode)) {
44844498
if (!ST.hasSDWASdst() && DstIdx != -1) {

llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -338,6 +338,15 @@ MachineInstr *SDWASrcOperand::potentialToConvert(const SIInstrInfo *TII) {
338338
}
339339

340340
bool SDWASrcOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) {
341+
switch (MI.getOpcode()) {
342+
case AMDGPU::V_CVT_F32_FP8_sdwa:
343+
case AMDGPU::V_CVT_F32_BF8_sdwa:
344+
case AMDGPU::V_CVT_PK_F32_FP8_sdwa:
345+
case AMDGPU::V_CVT_PK_F32_BF8_sdwa:
346+
// Does not support input modifiers: noabs, noneg, nosext.
347+
return false;
348+
}
349+
341350
// Find operand in instruction that matches source operand and replace it with
342351
// target operand. Set corresponding src_sel
343352
bool IsPreserveSrc = false;

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,3 +188,99 @@ define i32 @test_cvt_sr_fp8_f32_byte3(float %x, i32 %r, i32 %old) {
188188
%ret = tail call i32 @llvm.amdgcn.cvt.sr.fp8.f32(float %x, i32 %r, i32 %old, i32 3)
189189
ret i32 %ret
190190
}
191+
192+
define float @test_sext_cvt_f32_fp8(i16 %a) {
193+
; GFX940-LABEL: test_sext_cvt_f32_fp8:
194+
; GFX940: ; %bb.0:
195+
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
196+
; GFX940-NEXT: v_bfe_i32 v0, v0, 0, 16
197+
; GFX940-NEXT: v_cvt_f32_fp8_sdwa v0, v0 src0_sel:BYTE_1
198+
; GFX940-NEXT: s_setpc_b64 s[30:31]
199+
;
200+
; GFX12-LABEL: test_sext_cvt_f32_fp8:
201+
; GFX12: ; %bb.0:
202+
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
203+
; GFX12-NEXT: s_wait_expcnt 0x0
204+
; GFX12-NEXT: s_wait_samplecnt 0x0
205+
; GFX12-NEXT: s_wait_bvhcnt 0x0
206+
; GFX12-NEXT: s_wait_kmcnt 0x0
207+
; GFX12-NEXT: v_bfe_i32 v0, v0, 0, 16
208+
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
209+
; GFX12-NEXT: v_cvt_f32_fp8_e64 v0, v0 op_sel:[0,1]
210+
; GFX12-NEXT: s_setpc_b64 s[30:31]
211+
%a.sext = sext i16 %a to i32
212+
%ret = tail call float @llvm.amdgcn.cvt.f32.fp8(i32 %a.sext, i32 1)
213+
ret float %ret
214+
}
215+
216+
define float @test_sext_cvt_f32_bf8(i16 %a) {
217+
; GFX940-LABEL: test_sext_cvt_f32_bf8:
218+
; GFX940: ; %bb.0:
219+
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
220+
; GFX940-NEXT: v_bfe_i32 v0, v0, 0, 16
221+
; GFX940-NEXT: v_cvt_f32_bf8_sdwa v0, v0 src0_sel:BYTE_1
222+
; GFX940-NEXT: s_setpc_b64 s[30:31]
223+
;
224+
; GFX12-LABEL: test_sext_cvt_f32_bf8:
225+
; GFX12: ; %bb.0:
226+
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
227+
; GFX12-NEXT: s_wait_expcnt 0x0
228+
; GFX12-NEXT: s_wait_samplecnt 0x0
229+
; GFX12-NEXT: s_wait_bvhcnt 0x0
230+
; GFX12-NEXT: s_wait_kmcnt 0x0
231+
; GFX12-NEXT: v_bfe_i32 v0, v0, 0, 16
232+
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
233+
; GFX12-NEXT: v_cvt_f32_bf8_e64 v0, v0 op_sel:[0,1]
234+
; GFX12-NEXT: s_setpc_b64 s[30:31]
235+
%a.sext = sext i16 %a to i32
236+
%ret = tail call float @llvm.amdgcn.cvt.f32.bf8(i32 %a.sext, i32 1)
237+
ret float %ret
238+
}
239+
240+
define <2 x float> @test_sext_cvt_pk_f32_bf8_word1(i16 %a) {
241+
; GFX940-LABEL: test_sext_cvt_pk_f32_bf8_word1:
242+
; GFX940: ; %bb.0:
243+
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
244+
; GFX940-NEXT: v_bfe_i32 v0, v0, 0, 16
245+
; GFX940-NEXT: v_cvt_pk_f32_bf8_sdwa v[0:1], v0 src0_sel:WORD_1
246+
; GFX940-NEXT: s_setpc_b64 s[30:31]
247+
;
248+
; GFX12-LABEL: test_sext_cvt_pk_f32_bf8_word1:
249+
; GFX12: ; %bb.0:
250+
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
251+
; GFX12-NEXT: s_wait_expcnt 0x0
252+
; GFX12-NEXT: s_wait_samplecnt 0x0
253+
; GFX12-NEXT: s_wait_bvhcnt 0x0
254+
; GFX12-NEXT: s_wait_kmcnt 0x0
255+
; GFX12-NEXT: v_bfe_i32 v0, v0, 0, 16
256+
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
257+
; GFX12-NEXT: v_cvt_pk_f32_bf8_e64 v[0:1], v0 op_sel:[1,0]
258+
; GFX12-NEXT: s_setpc_b64 s[30:31]
259+
%a.sext = sext i16 %a to i32
260+
%ret = tail call <2 x float> @llvm.amdgcn.cvt.pk.f32.bf8(i32 %a.sext, i1 true)
261+
ret <2 x float> %ret
262+
}
263+
264+
define <2 x float> @test_sext_cvt_pk_f32_fp8_word0(i16 %a) {
265+
; GFX940-LABEL: test_sext_cvt_pk_f32_fp8_word0:
266+
; GFX940: ; %bb.0:
267+
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
268+
; GFX940-NEXT: v_bfe_i32 v0, v0, 0, 16
269+
; GFX940-NEXT: v_cvt_pk_f32_fp8_e32 v[0:1], v0
270+
; GFX940-NEXT: s_setpc_b64 s[30:31]
271+
;
272+
; GFX12-LABEL: test_sext_cvt_pk_f32_fp8_word0:
273+
; GFX12: ; %bb.0:
274+
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
275+
; GFX12-NEXT: s_wait_expcnt 0x0
276+
; GFX12-NEXT: s_wait_samplecnt 0x0
277+
; GFX12-NEXT: s_wait_bvhcnt 0x0
278+
; GFX12-NEXT: s_wait_kmcnt 0x0
279+
; GFX12-NEXT: v_bfe_i32 v0, v0, 0, 16
280+
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
281+
; GFX12-NEXT: v_cvt_pk_f32_fp8_e32 v[0:1], v0
282+
; GFX12-NEXT: s_setpc_b64 s[30:31]
283+
%a.sext = sext i16 %a to i32
284+
%ret = tail call <2 x float> @llvm.amdgcn.cvt.pk.f32.fp8(i32 %a.sext, i1 false)
285+
ret <2 x float> %ret
286+
}
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
# RUN: not --crash llc -mtriple=amdgcn -mcpu=gfx940 -run-pass machineverifier -o /dev/null %s 2>&1 | FileCheck -implicit-check-not="Bad machine code" %s
2+
3+
# CHECK: *** Bad machine code: sext, abs and neg are not allowed on this instruction ***
4+
# CHECK: $vgpr0 = V_CVT_F32_FP8_sdwa 1, $vgpr0, 0, 0, 4, implicit $mode, implicit $exec
5+
# CHECK: *** Bad machine code: sext, abs and neg are not allowed on this instruction ***
6+
# CHECK: $vgpr0 = V_CVT_F32_BF8_sdwa 1, $vgpr0, 0, 0, 4, implicit $mode, implicit $exec
7+
# CHECK: *** Bad machine code: sext, abs and neg are not allowed on this instruction ***
8+
# CHECK: $vgpr0_vgpr1 = V_CVT_PK_F32_FP8_sdwa 1, $vgpr0, 0, 0, 4, implicit $mode, implicit $exec
9+
# CHECK: *** Bad machine code: sext, abs and neg are not allowed on this instruction ***
10+
# CHECK: $vgpr0_vgpr1 = V_CVT_PK_F32_BF8_sdwa 1, $vgpr0, 0, 0, 4, implicit $mode, implicit $exec
11+
# CHECK: *** Bad machine code: sext, abs and neg are not allowed on this instruction ***
12+
# CHECK: $vgpr0 = V_CVT_F32_FP8_sdwa 2, $vgpr0, 0, 0, 4, implicit $mode, implicit $exec
13+
# CHECK: *** Bad machine code: sext, abs and neg are not allowed on this instruction ***
14+
# CHECK: $vgpr0 = V_CVT_F32_BF8_sdwa 2, $vgpr0, 0, 0, 4, implicit $mode, implicit $exec
15+
# CHECK: *** Bad machine code: sext, abs and neg are not allowed on this instruction ***
16+
# CHECK: $vgpr0_vgpr1 = V_CVT_PK_F32_FP8_sdwa 2, $vgpr0, 0, 0, 4, implicit $mode, implicit $exec
17+
# CHECK: *** Bad machine code: sext, abs and neg are not allowed on this instruction ***
18+
# CHECK: $vgpr0_vgpr1 = V_CVT_PK_F32_BF8_sdwa 2, $vgpr0, 0, 0, 4, implicit $mode, implicit $exec
19+
20+
---
21+
name: test
22+
liveins:
23+
body: |
24+
bb.0:
25+
liveins: $vgpr0, $vgpr0_vgpr1
26+
27+
; sext/neg
28+
$vgpr0 = V_CVT_F32_FP8_sdwa 1, $vgpr0, 0, 0, 4, implicit $mode, implicit $exec
29+
$vgpr0 = V_CVT_F32_BF8_sdwa 1, $vgpr0, 0, 0, 4, implicit $mode, implicit $exec
30+
$vgpr0_vgpr1 = V_CVT_PK_F32_FP8_sdwa 1, $vgpr0, 0, 0, 4, implicit $mode, implicit $exec
31+
$vgpr0_vgpr1 = V_CVT_PK_F32_BF8_sdwa 1, $vgpr0, 0, 0, 4, implicit $mode, implicit $exec
32+
33+
; abs
34+
$vgpr0 = V_CVT_F32_FP8_sdwa 2, $vgpr0, 0, 0, 4, implicit $mode, implicit $exec
35+
$vgpr0 = V_CVT_F32_BF8_sdwa 2, $vgpr0, 0, 0, 4, implicit $mode, implicit $exec
36+
$vgpr0_vgpr1 = V_CVT_PK_F32_FP8_sdwa 2, $vgpr0, 0, 0, 4, implicit $mode, implicit $exec
37+
$vgpr0_vgpr1 = V_CVT_PK_F32_BF8_sdwa 2, $vgpr0, 0, 0, 4, implicit $mode, implicit $exec
38+
...

0 commit comments

Comments
 (0)