Skip to content

Commit 9a26ec7

Browse files
arsenmpravinjagtap
authored andcommitted
AMDGPU: Fix incorrectly selecting fp8/bf8 conversion intrinsics (llvm#107291)
Trying to codegen these on targets without the instructions should fail to select. Not sure if all the predicates are correct. We had a fake one disconnected to a feature which was always true. Fixes: SWDEV-482274
1 parent d284ddd commit 9a26ec7

File tree

4 files changed

+119
-5
lines changed

4 files changed

+119
-5
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -685,6 +685,13 @@ def FeatureFP8ConversionInsts : SubtargetFeature<"fp8-conversion-insts",
685685
"Has fp8 and bf8 conversion instructions"
686686
>;
687687

688+
def FeatureCvtFP8VOP1Bug : SubtargetFeature<"cvt-fp8-vop1-bug",
689+
"HasCvtFP8Vop1Bug",
690+
"true",
691+
"FP8/BF8 VOP1 form of conversion to F32 is unreliable",
692+
[FeatureFP8ConversionInsts]
693+
>;
694+
688695
def FeaturePkFmacF16Inst : SubtargetFeature<"pk-fmac-f16-inst",
689696
"HasPkFmacF16Inst",
690697
"true",
@@ -1459,6 +1466,7 @@ def FeatureISAVersion9_4_0 : FeatureSet<
14591466
FeatureForceStoreSC0SC1,
14601467
FeatureFP8Insts,
14611468
FeatureFP8ConversionInsts,
1469+
FeatureCvtFP8VOP1Bug,
14621470
FeatureXF32Insts
14631471
])>;
14641472

@@ -1468,6 +1476,7 @@ def FeatureISAVersion9_4_1 : FeatureSet<
14681476
FeatureForceStoreSC0SC1,
14691477
FeatureFP8Insts,
14701478
FeatureFP8ConversionInsts,
1479+
FeatureCvtFP8VOP1Bug,
14711480
FeatureXF32Insts
14721481
])>;
14731482

@@ -1476,6 +1485,7 @@ def FeatureISAVersion9_4_2 : FeatureSet<
14761485
[
14771486
FeatureFP8Insts,
14781487
FeatureFP8ConversionInsts,
1488+
FeatureCvtFP8VOP1Bug,
14791489
FeatureXF32Insts
14801490
])>;
14811491

llvm/lib/Target/AMDGPU/GCNSubtarget.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
158158
bool HasMAIInsts = false;
159159
bool HasFP8Insts = false;
160160
bool HasFP8ConversionInsts = false;
161+
bool HasCvtFP8Vop1Bug = false;
161162
bool HasPkFmacF16Inst = false;
162163
bool HasAtomicFMinFMaxF32GlobalInsts = false;
163164
bool HasAtomicFMinFMaxF64GlobalInsts = false;
@@ -1355,7 +1356,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
13551356
bool hasSplitBarriers() const { return getGeneration() >= GFX12; }
13561357

13571358
// \returns true if FP8/BF8 VOP1 form of conversion to F32 is unreliable.
1358-
bool hasCvtFP8VOP1Bug() const { return true; }
1359+
bool hasCvtFP8VOP1Bug() const { return HasCvtFP8Vop1Bug; }
13591360

13601361
// \returns true if CSUB (a.k.a. SUB_CLAMP on GFX12) atomics support a
13611362
// no-return form.

llvm/lib/Target/AMDGPU/VOP1Instructions.td

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -598,26 +598,29 @@ class Cvt_F32_F8_Pat<SDPatternOperator node, int index,
598598
(inst_sdwa 0, $src, 0, 0, index)
599599
>;
600600

601-
let SubtargetPredicate = isGFX9Only in {
601+
let SubtargetPredicate = HasFP8ConversionInsts in {
602602
let OtherPredicates = [HasCvtFP8VOP1Bug] in {
603603
def : GCNPat<(f32 (int_amdgcn_cvt_f32_fp8 i32:$src, 0)),
604604
(V_CVT_F32_FP8_sdwa 0, $src, 0, 0, 0)>;
605605
def : GCNPat<(f32 (int_amdgcn_cvt_f32_bf8 i32:$src, 0)),
606606
(V_CVT_F32_BF8_sdwa 0, $src, 0, 0, 0)>;
607607
}
608608

609-
let OtherPredicates = [HasNoCvtFP8VOP1Bug] in {
609+
let OtherPredicates = [HasNoCvtFP8VOP1Bug, HasSDWA] in { // FIXME: HasSDWA is a substitute for !gfx12
610610
def : GCNPat<(f32 (int_amdgcn_cvt_f32_fp8 i32:$src, 0)),
611611
(V_CVT_F32_FP8_e32 $src)>;
612612
def : GCNPat<(f32 (int_amdgcn_cvt_f32_bf8 i32:$src, 0)),
613613
(V_CVT_F32_BF8_e32 $src)>;
614614
}
615615

616+
let OtherPredicates = [HasSDWA] in {
616617
foreach Index = [1, 2, 3] in {
617618
def : Cvt_F32_F8_Pat<int_amdgcn_cvt_f32_fp8, Index, V_CVT_F32_FP8_sdwa>;
618619
def : Cvt_F32_F8_Pat<int_amdgcn_cvt_f32_bf8, Index, V_CVT_F32_BF8_sdwa>;
619620
}
620-
} // End SubtargetPredicate = isGFX9Only
621+
} // End OtherPredicates = [HasSDWA]
622+
623+
} // End SubtargetPredicate = HasFP8ConversionInsts
621624

622625
class Cvt_PK_F32_F8_Pat<SDPatternOperator node, int index,
623626
VOP1_Pseudo inst_e32, VOP1_SDWA_Pseudo inst_sdwa> : GCNPat<
@@ -627,7 +630,7 @@ class Cvt_PK_F32_F8_Pat<SDPatternOperator node, int index,
627630
(inst_e32 $src))
628631
>;
629632

630-
let SubtargetPredicate = isGFX9Only in {
633+
let SubtargetPredicate = HasFP8ConversionInsts, OtherPredicates = [HasSDWA] in {
631634
foreach Index = [0, -1] in {
632635
def : Cvt_PK_F32_F8_Pat<int_amdgcn_cvt_pk_f32_fp8, Index,
633636
V_CVT_PK_F32_FP8_e32, V_CVT_PK_F32_FP8_sdwa>;
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
; RUN: split-file %s %t
2+
3+
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx908 -filetype=null %t/fp8-byte0-err.ll 2>&1 | FileCheck -check-prefix=ERR-FP8-BYTE0-ERR %s
4+
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx908 -filetype=null %t/fp8-byte1-err.ll 2>&1 | FileCheck -check-prefix=ERR-FP8-BYTE1-ERR %s
5+
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx908 -filetype=null %t/bf8-byte0-err.ll 2>&1 | FileCheck -check-prefix=ERR-BF8-BYTE0-ERR %s
6+
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx908 -filetype=null %t/bf8-byte1-err.ll 2>&1 | FileCheck -check-prefix=ERR-BF8-BYTE1-ERR %s
7+
8+
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx90a -filetype=null %t/fp8-byte0-err.ll 2>&1 | FileCheck -check-prefix=ERR-FP8-BYTE0-ERR %s
9+
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx90a -filetype=null %t/fp8-byte1-err.ll 2>&1 | FileCheck -check-prefix=ERR-FP8-BYTE1-ERR %s
10+
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx90a -filetype=null %t/bf8-byte0-err.ll 2>&1 | FileCheck -check-prefix=ERR-BF8-BYTE0-ERR %s
11+
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx90a -filetype=null %t/bf8-byte1-err.ll 2>&1 | FileCheck -check-prefix=ERR-BF8-BYTE1-ERR %s
12+
13+
14+
; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx908 -filetype=null %t/fp8-byte0-err.ll 2>&1 | FileCheck -check-prefix=ERR-FP8-BYTE0-ERR-GISEL %s
15+
; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx908 -filetype=null %t/fp8-byte1-err.ll 2>&1 | FileCheck -check-prefix=ERR-FP8-BYTE1-ERR-GISEL %s
16+
; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx908 -filetype=null %t/bf8-byte0-err.ll 2>&1 | FileCheck -check-prefix=ERR-BF8-BYTE0-ERR-GISEL %s
17+
; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx908 -filetype=null %t/bf8-byte1-err.ll 2>&1 | FileCheck -check-prefix=ERR-BF8-BYTE1-ERR-GISEL %s
18+
19+
; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx90a -filetype=null %t/fp8-byte0-err.ll 2>&1 | FileCheck -check-prefix=ERR-FP8-BYTE0-ERR-GISEL %s
20+
; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx90a -filetype=null %t/fp8-byte1-err.ll 2>&1 | FileCheck -check-prefix=ERR-FP8-BYTE1-ERR-GISEL %s
21+
; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx90a -filetype=null %t/bf8-byte0-err.ll 2>&1 | FileCheck -check-prefix=ERR-BF8-BYTE0-ERR-GISEL %s
22+
; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx90a -filetype=null %t/bf8-byte1-err.ll 2>&1 | FileCheck -check-prefix=ERR-BF8-BYTE1-ERR-GISEL %s
23+
24+
25+
26+
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx908 -filetype=null %t/pk-fp8-word0-err.ll 2>&1 | FileCheck -check-prefix=ERR-PK-FP8-WORD0-ERR %s
27+
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx908 -filetype=null %t/pk-fp8-word1-err.ll 2>&1 | FileCheck -check-prefix=ERR-PK-FP8-WORD1-ERR %s
28+
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx908 -filetype=null %t/pk-bf8-word0-err.ll 2>&1 | FileCheck -check-prefix=ERR-PK-BF8-WORD0-ERR %s
29+
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx908 -filetype=null %t/pk-bf8-word1-err.ll 2>&1 | FileCheck -check-prefix=ERR-PK-BF8-WORD1-ERR %s
30+
31+
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx90a -filetype=null %t/pk-fp8-word0-err.ll 2>&1 | FileCheck -check-prefix=ERR-PK-FP8-WORD0-ERR %s
32+
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx90a -filetype=null %t/pk-fp8-word1-err.ll 2>&1 | FileCheck -check-prefix=ERR-PK-FP8-WORD1-ERR %s
33+
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx90a -filetype=null %t/pk-bf8-word0-err.ll 2>&1 | FileCheck -check-prefix=ERR-PK-BF8-WORD0-ERR %s
34+
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx90a -filetype=null %t/pk-bf8-word1-err.ll 2>&1 | FileCheck -check-prefix=ERR-PK-BF8-WORD1-ERR %s
35+
36+
37+
;--- fp8-byte0-err.ll
38+
; ERR-FP8-BYTE0-ERR: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.cvt.f32.fp8
39+
; ERR-FP8-BYTE0-ERR-GISEL: LLVM ERROR: cannot select: %{{[0-9]+}}:vgpr_32(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.f32.fp8), %{{[0-9]+}}:vgpr(s32), 0
40+
41+
define float @test_cvt_f32_fp8_byte0(i32 %a) {
42+
%ret = tail call float @llvm.amdgcn.cvt.f32.fp8(i32 %a, i32 0)
43+
ret float %ret
44+
}
45+
46+
;--- fp8-byte1-err.ll
47+
; ERR-FP8-BYTE1-ERR: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.cvt.f32.fp8
48+
; ERR-FP8-BYTE1-ERR-GISEL: LLVM ERROR: cannot select: %{{[0-9]+}}:vgpr_32(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.f32.fp8), %{{[0-9]+}}:vgpr(s32), 1
49+
define float @test_cvt_f32_fp8_byte1(i32 %a) {
50+
%ret = tail call float @llvm.amdgcn.cvt.f32.fp8(i32 %a, i32 1)
51+
ret float %ret
52+
}
53+
54+
;--- bf8-byte0-err.ll
55+
; ERR-BF8-BYTE0-ERR: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.cvt.f32.bf8
56+
; ERR-BF8-BYTE0-ERR-GISEL: LLVM ERROR: cannot select: %{{[0-9]+}}:vgpr_32(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.f32.bf8), %{{[0-9]+}}:vgpr(s32), 0
57+
define float @test_cvt_f32_bf8_byte0(i32 %a) {
58+
%ret = tail call float @llvm.amdgcn.cvt.f32.bf8(i32 %a, i32 0)
59+
ret float %ret
60+
}
61+
62+
;--- bf8-byte1-err.ll
63+
; ERR-BF8-BYTE1-ERR: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.cvt.f32.bf8
64+
; ERR-BF8-BYTE1-ERR-GISEL: LLVM ERROR: cannot select: %{{[0-9]+}}:vgpr_32(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.f32.bf8), %{{[0-9]+}}:vgpr(s32), 1
65+
define float @test_cvt_f32_bf8_byte1(i32 %a) {
66+
%ret = tail call float @llvm.amdgcn.cvt.f32.bf8(i32 %a, i32 1)
67+
ret float %ret
68+
}
69+
70+
;--- pk-fp8-word0-err.ll
71+
; ERR-PK-FP8-WORD0-ERR: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.cvt.pk.f32.fp8
72+
; ERR-PK-FP8-WORD0-ERR-GISEL: LLVM ERROR: cannot select: %{{[0-9]+}}:vgpr_32(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pk.f32.fp8), %{{[0-9]+}}:vgpr(s32), 0
73+
define <2 x float> @test_cvt_pk_f32_fp8_word0(i32 %a) {
74+
%ret = tail call <2 x float> @llvm.amdgcn.cvt.pk.f32.fp8(i32 %a, i1 false)
75+
ret <2 x float> %ret
76+
}
77+
78+
;--- pk-fp8-word1-err.ll
79+
; ERR-PK-FP8-WORD1-ERR: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.cvt.pk.f32.fp8
80+
; ERR-PK-FP8-WORD1-ERR-GISEL: LLVM ERROR: cannot select: %{{[0-9]+}}:vgpr_32(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pk.f32.fp8), %{{[0-9]+}}:vgpr(s32), 1
81+
define <2 x float> @test_cvt_pk_f32_fp8_word1(i32 %a) {
82+
%ret = tail call <2 x float> @llvm.amdgcn.cvt.pk.f32.fp8(i32 %a, i1 true)
83+
ret <2 x float> %ret
84+
}
85+
86+
;--- pk-bf8-word0-err.ll
87+
; ERR-PK-BF8-WORD0-ERR: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.cvt.pk.f32.bf8
88+
; ERR-PK-BF8-WORD0-ERR-GISEL: LLVM ERROR: cannot select: %{{[0-9]+}}:vgpr_32(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pk.f32.bf8), %{{[0-9]+}}:vgpr(s32), 0
89+
define <2 x float> @test_cvt_pk_f32_bf8_word0(i32 %a) {
90+
%ret = tail call <2 x float> @llvm.amdgcn.cvt.pk.f32.bf8(i32 %a, i1 false)
91+
ret <2 x float> %ret
92+
}
93+
94+
;--- pk-bf8-word1-err.ll
95+
; ERR-PK-BF8-WORD1-ERR: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.cvt.pk.f32.bf8
96+
; ERR-PK-BF8-WORD1-ERR-GISEL: LLVM ERROR: cannot select: %{{[0-9]+}}:vgpr_32(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pk.f32.bf8), %{{[0-9]+}}:vgpr(s32), 1
97+
define <2 x float> @test_cvt_pk_f32_bf8_word1(i32 %a) {
98+
%ret = tail call <2 x float> @llvm.amdgcn.cvt.pk.f32.bf8(i32 %a, i1 true)
99+
ret <2 x float> %ret
100+
}

0 commit comments

Comments
 (0)