Skip to content

Commit a075e78

Browse files
authored
AMDGPU: Fix incorrectly selecting fp8/bf8 conversion intrinsics (#107291)
Trying to codegen these on targets without the instructions should fail to select. Not sure if all the predicates are correct. We had a fake one disconnected to a feature which was always true. Fixes: SWDEV-482274
1 parent 10ada4a commit a075e78

File tree

4 files changed

+117
-5
lines changed

4 files changed

+117
-5
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -685,6 +685,13 @@ def FeatureFP8ConversionInsts : SubtargetFeature<"fp8-conversion-insts",
685685
"Has fp8 and bf8 conversion instructions"
686686
>;
687687

688+
def FeatureCvtFP8VOP1Bug : SubtargetFeature<"cvt-fp8-vop1-bug",
689+
"HasCvtFP8Vop1Bug",
690+
"true",
691+
"FP8/BF8 VOP1 form of conversion to F32 is unreliable",
692+
[FeatureFP8ConversionInsts]
693+
>;
694+
688695
def FeaturePkFmacF16Inst : SubtargetFeature<"pk-fmac-f16-inst",
689696
"HasPkFmacF16Inst",
690697
"true",
@@ -1439,6 +1446,7 @@ def FeatureISAVersion9_4_Common : FeatureSet<
14391446
FeatureMAIInsts,
14401447
FeatureFP8Insts,
14411448
FeatureFP8ConversionInsts,
1449+
FeatureCvtFP8VOP1Bug,
14421450
FeaturePkFmacF16Inst,
14431451
FeatureAtomicFaddRtnInsts,
14441452
FeatureAtomicFaddNoRtnInsts,

llvm/lib/Target/AMDGPU/GCNSubtarget.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
158158
bool HasMAIInsts = false;
159159
bool HasFP8Insts = false;
160160
bool HasFP8ConversionInsts = false;
161+
bool HasCvtFP8Vop1Bug = false;
161162
bool HasPkFmacF16Inst = false;
162163
bool HasAtomicFMinFMaxF32GlobalInsts = false;
163164
bool HasAtomicFMinFMaxF64GlobalInsts = false;
@@ -1352,7 +1353,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
13521353
bool hasSplitBarriers() const { return getGeneration() >= GFX12; }
13531354

13541355
// \returns true if FP8/BF8 VOP1 form of conversion to F32 is unreliable.
1355-
bool hasCvtFP8VOP1Bug() const { return true; }
1356+
bool hasCvtFP8VOP1Bug() const { return HasCvtFP8Vop1Bug; }
13561357

13571358
// \returns true if CSUB (a.k.a. SUB_CLAMP on GFX12) atomics support a
13581359
// no-return form.

llvm/lib/Target/AMDGPU/VOP1Instructions.td

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -582,26 +582,29 @@ class Cvt_F32_F8_Pat<SDPatternOperator node, int index,
582582
(inst_sdwa 0, $src, 0, 0, index)
583583
>;
584584

585-
let SubtargetPredicate = isGFX9Only in {
585+
let SubtargetPredicate = HasFP8ConversionInsts in {
586586
let OtherPredicates = [HasCvtFP8VOP1Bug] in {
587587
def : GCNPat<(f32 (int_amdgcn_cvt_f32_fp8 i32:$src, 0)),
588588
(V_CVT_F32_FP8_sdwa 0, $src, 0, 0, 0)>;
589589
def : GCNPat<(f32 (int_amdgcn_cvt_f32_bf8 i32:$src, 0)),
590590
(V_CVT_F32_BF8_sdwa 0, $src, 0, 0, 0)>;
591591
}
592592

593-
let OtherPredicates = [HasNoCvtFP8VOP1Bug] in {
593+
let OtherPredicates = [HasNoCvtFP8VOP1Bug, HasSDWA] in { // FIXME: HasSDWA is a substitute for !gfx12
594594
def : GCNPat<(f32 (int_amdgcn_cvt_f32_fp8 i32:$src, 0)),
595595
(V_CVT_F32_FP8_e32 $src)>;
596596
def : GCNPat<(f32 (int_amdgcn_cvt_f32_bf8 i32:$src, 0)),
597597
(V_CVT_F32_BF8_e32 $src)>;
598598
}
599599

600+
let OtherPredicates = [HasSDWA] in {
600601
foreach Index = [1, 2, 3] in {
601602
def : Cvt_F32_F8_Pat<int_amdgcn_cvt_f32_fp8, Index, V_CVT_F32_FP8_sdwa>;
602603
def : Cvt_F32_F8_Pat<int_amdgcn_cvt_f32_bf8, Index, V_CVT_F32_BF8_sdwa>;
603604
}
604-
} // End SubtargetPredicate = isGFX9Only
605+
} // End OtherPredicates = [HasSDWA]
606+
607+
} // End SubtargetPredicate = HasFP8ConversionInsts
605608

606609
class Cvt_PK_F32_F8_Pat<SDPatternOperator node, int index,
607610
VOP1_Pseudo inst_e32, VOP1_SDWA_Pseudo inst_sdwa> : GCNPat<
@@ -611,7 +614,7 @@ class Cvt_PK_F32_F8_Pat<SDPatternOperator node, int index,
611614
(inst_e32 $src))
612615
>;
613616

614-
let SubtargetPredicate = isGFX9Only in {
617+
let SubtargetPredicate = HasFP8ConversionInsts, OtherPredicates = [HasSDWA] in {
615618
foreach Index = [0, -1] in {
616619
def : Cvt_PK_F32_F8_Pat<int_amdgcn_cvt_pk_f32_fp8, Index,
617620
V_CVT_PK_F32_FP8_e32, V_CVT_PK_F32_FP8_sdwa>;
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
; RUN: split-file %s %t
2+
3+
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx908 -filetype=null %t/fp8-byte0-err.ll 2>&1 | FileCheck -check-prefix=ERR-FP8-BYTE0-ERR %s
4+
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx908 -filetype=null %t/fp8-byte1-err.ll 2>&1 | FileCheck -check-prefix=ERR-FP8-BYTE1-ERR %s
5+
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx908 -filetype=null %t/bf8-byte0-err.ll 2>&1 | FileCheck -check-prefix=ERR-BF8-BYTE0-ERR %s
6+
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx908 -filetype=null %t/bf8-byte1-err.ll 2>&1 | FileCheck -check-prefix=ERR-BF8-BYTE1-ERR %s
7+
8+
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx90a -filetype=null %t/fp8-byte0-err.ll 2>&1 | FileCheck -check-prefix=ERR-FP8-BYTE0-ERR %s
9+
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx90a -filetype=null %t/fp8-byte1-err.ll 2>&1 | FileCheck -check-prefix=ERR-FP8-BYTE1-ERR %s
10+
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx90a -filetype=null %t/bf8-byte0-err.ll 2>&1 | FileCheck -check-prefix=ERR-BF8-BYTE0-ERR %s
11+
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx90a -filetype=null %t/bf8-byte1-err.ll 2>&1 | FileCheck -check-prefix=ERR-BF8-BYTE1-ERR %s
12+
13+
14+
; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx908 -filetype=null %t/fp8-byte0-err.ll 2>&1 | FileCheck -check-prefix=ERR-FP8-BYTE0-ERR-GISEL %s
15+
; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx908 -filetype=null %t/fp8-byte1-err.ll 2>&1 | FileCheck -check-prefix=ERR-FP8-BYTE1-ERR-GISEL %s
16+
; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx908 -filetype=null %t/bf8-byte0-err.ll 2>&1 | FileCheck -check-prefix=ERR-BF8-BYTE0-ERR-GISEL %s
17+
; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx908 -filetype=null %t/bf8-byte1-err.ll 2>&1 | FileCheck -check-prefix=ERR-BF8-BYTE1-ERR-GISEL %s
18+
19+
; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx90a -filetype=null %t/fp8-byte0-err.ll 2>&1 | FileCheck -check-prefix=ERR-FP8-BYTE0-ERR-GISEL %s
20+
; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx90a -filetype=null %t/fp8-byte1-err.ll 2>&1 | FileCheck -check-prefix=ERR-FP8-BYTE1-ERR-GISEL %s
21+
; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx90a -filetype=null %t/bf8-byte0-err.ll 2>&1 | FileCheck -check-prefix=ERR-BF8-BYTE0-ERR-GISEL %s
22+
; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx90a -filetype=null %t/bf8-byte1-err.ll 2>&1 | FileCheck -check-prefix=ERR-BF8-BYTE1-ERR-GISEL %s
23+
24+
25+
26+
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx908 -filetype=null %t/pk-fp8-word0-err.ll 2>&1 | FileCheck -check-prefix=ERR-PK-FP8-WORD0-ERR %s
27+
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx908 -filetype=null %t/pk-fp8-word1-err.ll 2>&1 | FileCheck -check-prefix=ERR-PK-FP8-WORD1-ERR %s
28+
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx908 -filetype=null %t/pk-bf8-word0-err.ll 2>&1 | FileCheck -check-prefix=ERR-PK-BF8-WORD0-ERR %s
29+
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx908 -filetype=null %t/pk-bf8-word1-err.ll 2>&1 | FileCheck -check-prefix=ERR-PK-BF8-WORD1-ERR %s
30+
31+
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx90a -filetype=null %t/pk-fp8-word0-err.ll 2>&1 | FileCheck -check-prefix=ERR-PK-FP8-WORD0-ERR %s
32+
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx90a -filetype=null %t/pk-fp8-word1-err.ll 2>&1 | FileCheck -check-prefix=ERR-PK-FP8-WORD1-ERR %s
33+
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx90a -filetype=null %t/pk-bf8-word0-err.ll 2>&1 | FileCheck -check-prefix=ERR-PK-BF8-WORD0-ERR %s
34+
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx90a -filetype=null %t/pk-bf8-word1-err.ll 2>&1 | FileCheck -check-prefix=ERR-PK-BF8-WORD1-ERR %s
35+
36+
37+
;--- fp8-byte0-err.ll
38+
; ERR-FP8-BYTE0-ERR: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.cvt.f32.fp8
39+
; ERR-FP8-BYTE0-ERR-GISEL: LLVM ERROR: cannot select: %{{[0-9]+}}:vgpr_32(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.f32.fp8), %{{[0-9]+}}:vgpr(s32), 0
40+
41+
define float @test_cvt_f32_fp8_byte0(i32 %a) {
42+
%ret = tail call float @llvm.amdgcn.cvt.f32.fp8(i32 %a, i32 0)
43+
ret float %ret
44+
}
45+
46+
;--- fp8-byte1-err.ll
47+
; ERR-FP8-BYTE1-ERR: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.cvt.f32.fp8
48+
; ERR-FP8-BYTE1-ERR-GISEL: LLVM ERROR: cannot select: %{{[0-9]+}}:vgpr_32(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.f32.fp8), %{{[0-9]+}}:vgpr(s32), 1
49+
define float @test_cvt_f32_fp8_byte1(i32 %a) {
50+
%ret = tail call float @llvm.amdgcn.cvt.f32.fp8(i32 %a, i32 1)
51+
ret float %ret
52+
}
53+
54+
;--- bf8-byte0-err.ll
55+
; ERR-BF8-BYTE0-ERR: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.cvt.f32.bf8
56+
; ERR-BF8-BYTE0-ERR-GISEL: LLVM ERROR: cannot select: %{{[0-9]+}}:vgpr_32(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.f32.bf8), %{{[0-9]+}}:vgpr(s32), 0
57+
define float @test_cvt_f32_bf8_byte0(i32 %a) {
58+
%ret = tail call float @llvm.amdgcn.cvt.f32.bf8(i32 %a, i32 0)
59+
ret float %ret
60+
}
61+
62+
;--- bf8-byte1-err.ll
63+
; ERR-BF8-BYTE1-ERR: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.cvt.f32.bf8
64+
; ERR-BF8-BYTE1-ERR-GISEL: LLVM ERROR: cannot select: %{{[0-9]+}}:vgpr_32(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.f32.bf8), %{{[0-9]+}}:vgpr(s32), 1
65+
define float @test_cvt_f32_bf8_byte1(i32 %a) {
66+
%ret = tail call float @llvm.amdgcn.cvt.f32.bf8(i32 %a, i32 1)
67+
ret float %ret
68+
}
69+
70+
;--- pk-fp8-word0-err.ll
71+
; ERR-PK-FP8-WORD0-ERR: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.cvt.pk.f32.fp8
72+
; ERR-PK-FP8-WORD0-ERR-GISEL: LLVM ERROR: cannot select: %{{[0-9]+}}:vgpr_32(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pk.f32.fp8), %{{[0-9]+}}:vgpr(s32), 0
73+
define <2 x float> @test_cvt_pk_f32_fp8_word0(i32 %a) {
74+
%ret = tail call <2 x float> @llvm.amdgcn.cvt.pk.f32.fp8(i32 %a, i1 false)
75+
ret <2 x float> %ret
76+
}
77+
78+
;--- pk-fp8-word1-err.ll
79+
; ERR-PK-FP8-WORD1-ERR: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.cvt.pk.f32.fp8
80+
; ERR-PK-FP8-WORD1-ERR-GISEL: LLVM ERROR: cannot select: %{{[0-9]+}}:vgpr_32(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pk.f32.fp8), %{{[0-9]+}}:vgpr(s32), 1
81+
define <2 x float> @test_cvt_pk_f32_fp8_word1(i32 %a) {
82+
%ret = tail call <2 x float> @llvm.amdgcn.cvt.pk.f32.fp8(i32 %a, i1 true)
83+
ret <2 x float> %ret
84+
}
85+
86+
;--- pk-bf8-word0-err.ll
87+
; ERR-PK-BF8-WORD0-ERR: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.cvt.pk.f32.bf8
88+
; ERR-PK-BF8-WORD0-ERR-GISEL: LLVM ERROR: cannot select: %{{[0-9]+}}:vgpr_32(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pk.f32.bf8), %{{[0-9]+}}:vgpr(s32), 0
89+
define <2 x float> @test_cvt_pk_f32_bf8_word0(i32 %a) {
90+
%ret = tail call <2 x float> @llvm.amdgcn.cvt.pk.f32.bf8(i32 %a, i1 false)
91+
ret <2 x float> %ret
92+
}
93+
94+
;--- pk-bf8-word1-err.ll
95+
; ERR-PK-BF8-WORD1-ERR: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.cvt.pk.f32.bf8
96+
; ERR-PK-BF8-WORD1-ERR-GISEL: LLVM ERROR: cannot select: %{{[0-9]+}}:vgpr_32(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pk.f32.bf8), %{{[0-9]+}}:vgpr(s32), 1
97+
define <2 x float> @test_cvt_pk_f32_bf8_word1(i32 %a) {
98+
%ret = tail call <2 x float> @llvm.amdgcn.cvt.pk.f32.bf8(i32 %a, i1 true)
99+
ret <2 x float> %ret
100+
}

0 commit comments

Comments
 (0)