Skip to content

Commit 2d3ae1b

Browse files
committed
AMDGPU: Fix incorrectly selecting fp8/bf8 conversion intrinsics
Trying to codegen these on targets without the instructions should fail to select. Not sure if all the predicates are correct. We had a fake one disconnected to a feature which was always true. Fixes: SWDEV-482274
1 parent c1667f9 commit 2d3ae1b

File tree

4 files changed

+117
-6
lines changed

4 files changed

+117
-6
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -685,6 +685,13 @@ def FeatureFP8ConversionInsts : SubtargetFeature<"fp8-conversion-insts",
685685
"Has fp8 and bf8 conversion instructions"
686686
>;
687687

688+
def FeatureCvtFP8VOP1Bug : SubtargetFeature<"cvt-fp8-vop1-bug",
689+
"HasCvtFP8Vop1Bug",
690+
"true",
691+
"FP8/BF8 VOP1 form of conversion to F32 is unreliable",
692+
[FeatureFP8ConversionInsts]
693+
>;
694+
688695
def FeaturePkFmacF16Inst : SubtargetFeature<"pk-fmac-f16-inst",
689696
"HasPkFmacF16Inst",
690697
"true",
@@ -1444,7 +1451,7 @@ def FeatureISAVersion9_4_Common : FeatureSet<
14441451
FeaturePackedFP32Ops,
14451452
FeatureMAIInsts,
14461453
FeatureFP8Insts,
1447-
FeatureFP8ConversionInsts,
1454+
FeatureFP8ConversionInsts, FeatureCvtFP8VOP1Bug,
14481455
FeaturePkFmacF16Inst,
14491456
FeatureAtomicFaddRtnInsts,
14501457
FeatureAtomicFaddNoRtnInsts,
@@ -1657,7 +1664,7 @@ def FeatureISAVersion12 : FeatureSet<
16571664
FeatureFlatAtomicFaddF32Inst,
16581665
FeatureImageInsts,
16591666
FeatureExtendedImageInsts,
1660-
FeatureFP8ConversionInsts,
1667+
FeatureFP8ConversionInsts, FeatureCvtFP8VOP1Bug,
16611668
FeaturePackedTID,
16621669
FeatureVcmpxPermlaneHazard,
16631670
FeatureSALUFloatInsts,

llvm/lib/Target/AMDGPU/GCNSubtarget.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
158158
bool HasMAIInsts = false;
159159
bool HasFP8Insts = false;
160160
bool HasFP8ConversionInsts = false;
161+
bool HasCvtFP8Vop1Bug = false;
161162
bool HasPkFmacF16Inst = false;
162163
bool HasAtomicFMinFMaxF32GlobalInsts = false;
163164
bool HasAtomicFMinFMaxF64GlobalInsts = false;

llvm/lib/Target/AMDGPU/VOP1Instructions.td

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -599,8 +599,8 @@ class Cvt_F32_F8_Pat<SDPatternOperator node, int index,
599599
(inst_sdwa 0, $src, 0, 0, index)
600600
>;
601601

602-
let SubtargetPredicate = isGFX9Only in {
603-
let OtherPredicates = [HasCvtFP8VOP1Bug] in {
602+
let SubtargetPredicate = HasFP8ConversionInsts in {
603+
let OtherPredicates = [HasCvtFP8VOP1Bug, HasSDWA] in {
604604
def : GCNPat<(f32 (int_amdgcn_cvt_f32_fp8 i32:$src, 0)),
605605
(V_CVT_F32_FP8_sdwa 0, $src, 0, 0, 0)>;
606606
def : GCNPat<(f32 (int_amdgcn_cvt_f32_bf8 i32:$src, 0)),
@@ -614,11 +614,14 @@ let OtherPredicates = [HasNoCvtFP8VOP1Bug] in {
614614
(V_CVT_F32_BF8_e32 $src)>;
615615
}
616616

617+
let OtherPredicates = [HasSDWA] in {
617618
foreach Index = [1, 2, 3] in {
618619
def : Cvt_F32_F8_Pat<int_amdgcn_cvt_f32_fp8, Index, V_CVT_F32_FP8_sdwa>;
619620
def : Cvt_F32_F8_Pat<int_amdgcn_cvt_f32_bf8, Index, V_CVT_F32_BF8_sdwa>;
620621
}
621-
} // End SubtargetPredicate = isGFX9Only
622+
} // End OtherPredicates = [HasSDWA]
623+
624+
} // End SubtargetPredicate = HasFP8ConversionInsts
622625

623626
class Cvt_PK_F32_F8_Pat<SDPatternOperator node, int index,
624627
VOP1_Pseudo inst_e32, VOP1_SDWA_Pseudo inst_sdwa> : GCNPat<
@@ -628,7 +631,7 @@ class Cvt_PK_F32_F8_Pat<SDPatternOperator node, int index,
628631
(inst_e32 $src))
629632
>;
630633

631-
let SubtargetPredicate = isGFX9Only in {
634+
let SubtargetPredicate = HasFP8ConversionInsts, OtherPredicates = [HasSDWA] in {
632635
foreach Index = [0, -1] in {
633636
def : Cvt_PK_F32_F8_Pat<int_amdgcn_cvt_pk_f32_fp8, Index,
634637
V_CVT_PK_F32_FP8_e32, V_CVT_PK_F32_FP8_sdwa>;
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
; RUN: split-file %s %t
2+
3+
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx908 -filetype=null %t/fp8-byte0-err.ll 2>&1 | FileCheck -check-prefix=ERR-FP8-BYTE0-ERR %s
4+
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx908 -filetype=null %t/fp8-byte1-err.ll 2>&1 | FileCheck -check-prefix=ERR-FP8-BYTE1-ERR %s
5+
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx908 -filetype=null %t/bf8-byte0-err.ll 2>&1 | FileCheck -check-prefix=ERR-BF8-BYTE0-ERR %s
6+
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx908 -filetype=null %t/bf8-byte1-err.ll 2>&1 | FileCheck -check-prefix=ERR-BF8-BYTE1-ERR %s
7+
8+
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx90a -filetype=null %t/fp8-byte0-err.ll 2>&1 | FileCheck -check-prefix=ERR-FP8-BYTE0-ERR %s
9+
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx90a -filetype=null %t/fp8-byte1-err.ll 2>&1 | FileCheck -check-prefix=ERR-FP8-BYTE1-ERR %s
10+
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx90a -filetype=null %t/bf8-byte0-err.ll 2>&1 | FileCheck -check-prefix=ERR-BF8-BYTE0-ERR %s
11+
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx90a -filetype=null %t/bf8-byte1-err.ll 2>&1 | FileCheck -check-prefix=ERR-BF8-BYTE1-ERR %s
12+
13+
14+
; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx908 -filetype=null %t/fp8-byte0-err.ll 2>&1 | FileCheck -check-prefix=ERR-FP8-BYTE0-ERR-GISEL %s
15+
; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx908 -filetype=null %t/fp8-byte1-err.ll 2>&1 | FileCheck -check-prefix=ERR-FP8-BYTE1-ERR-GISEL %s
16+
; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx908 -filetype=null %t/bf8-byte0-err.ll 2>&1 | FileCheck -check-prefix=ERR-BF8-BYTE0-ERR-GISEL %s
17+
; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx908 -filetype=null %t/bf8-byte1-err.ll 2>&1 | FileCheck -check-prefix=ERR-BF8-BYTE1-ERR-GISEL %s
18+
19+
; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx90a -filetype=null %t/fp8-byte0-err.ll 2>&1 | FileCheck -check-prefix=ERR-FP8-BYTE0-ERR-GISEL %s
20+
; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx90a -filetype=null %t/fp8-byte1-err.ll 2>&1 | FileCheck -check-prefix=ERR-FP8-BYTE1-ERR-GISEL %s
21+
; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx90a -filetype=null %t/bf8-byte0-err.ll 2>&1 | FileCheck -check-prefix=ERR-BF8-BYTE0-ERR-GISEL %s
22+
; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx90a -filetype=null %t/bf8-byte1-err.ll 2>&1 | FileCheck -check-prefix=ERR-BF8-BYTE1-ERR-GISEL %s
23+
24+
25+
26+
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx908 -filetype=null %t/pk-fp8-word0-err.ll 2>&1 | FileCheck -check-prefix=ERR-PK-FP8-WORD0-ERR %s
27+
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx908 -filetype=null %t/pk-fp8-word1-err.ll 2>&1 | FileCheck -check-prefix=ERR-PK-FP8-WORD1-ERR %s
28+
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx908 -filetype=null %t/pk-bf8-word0-err.ll 2>&1 | FileCheck -check-prefix=ERR-PK-BF8-WORD0-ERR %s
29+
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx908 -filetype=null %t/pk-bf8-word1-err.ll 2>&1 | FileCheck -check-prefix=ERR-PK-BF8-WORD1-ERR %s
30+
31+
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx90a -filetype=null %t/pk-fp8-word0-err.ll 2>&1 | FileCheck -check-prefix=ERR-PK-FP8-WORD0-ERR %s
32+
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx90a -filetype=null %t/pk-fp8-word1-err.ll 2>&1 | FileCheck -check-prefix=ERR-PK-FP8-WORD1-ERR %s
33+
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx90a -filetype=null %t/pk-bf8-word0-err.ll 2>&1 | FileCheck -check-prefix=ERR-PK-BF8-WORD0-ERR %s
34+
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx90a -filetype=null %t/pk-bf8-word1-err.ll 2>&1 | FileCheck -check-prefix=ERR-PK-BF8-WORD1-ERR %s
35+
36+
37+
;--- fp8-byte0-err.ll
38+
; ERR-FP8-BYTE0-ERR: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.cvt.f32.fp8
39+
; ERR-FP8-BYTE0-ERR-GISEL: LLVM ERROR: cannot select: %{{[0-9]+}}:vgpr_32(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.f32.fp8), %{{[0-9]+}}:vgpr(s32), 0
40+
41+
define float @test_cvt_f32_fp8_byte0(i32 %a) {
42+
%ret = tail call float @llvm.amdgcn.cvt.f32.fp8(i32 %a, i32 0)
43+
ret float %ret
44+
}
45+
46+
;--- fp8-byte1-err.ll
47+
; ERR-FP8-BYTE1-ERR: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.cvt.f32.fp8
48+
; ERR-FP8-BYTE1-ERR-GISEL: LLVM ERROR: cannot select: %{{[0-9]+}}:vgpr_32(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.f32.fp8), %{{[0-9]+}}:vgpr(s32), 1
49+
define float @test_cvt_f32_fp8_byte1(i32 %a) {
50+
%ret = tail call float @llvm.amdgcn.cvt.f32.fp8(i32 %a, i32 1)
51+
ret float %ret
52+
}
53+
54+
;--- bf8-byte0-err.ll
55+
; ERR-BF8-BYTE0-ERR: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.cvt.f32.bf8
56+
; ERR-BF8-BYTE0-ERR-GISEL: LLVM ERROR: cannot select: %{{[0-9]+}}:vgpr_32(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.f32.bf8), %{{[0-9]+}}:vgpr(s32), 0
57+
define float @test_cvt_f32_bf8_byte0(i32 %a) {
58+
%ret = tail call float @llvm.amdgcn.cvt.f32.bf8(i32 %a, i32 0)
59+
ret float %ret
60+
}
61+
62+
;--- bf8-byte1-err.ll
63+
; ERR-BF8-BYTE1-ERR: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.cvt.f32.bf8
64+
; ERR-BF8-BYTE1-ERR-GISEL: LLVM ERROR: cannot select: %{{[0-9]+}}:vgpr_32(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.f32.bf8), %{{[0-9]+}}:vgpr(s32), 1
65+
define float @test_cvt_f32_bf8_byte1(i32 %a) {
66+
%ret = tail call float @llvm.amdgcn.cvt.f32.bf8(i32 %a, i32 1)
67+
ret float %ret
68+
}
69+
70+
;--- pk-fp8-word0-err.ll
71+
; ERR-PK-FP8-WORD0-ERR: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.cvt.pk.f32.fp8
72+
; ERR-PK-FP8-WORD0-ERR-GISEL: LLVM ERROR: cannot select: %{{[0-9]+}}:vgpr_32(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pk.f32.fp8), %{{[0-9]+}}:vgpr(s32), 0
73+
define <2 x float> @test_cvt_pk_f32_fp8_word0(i32 %a) {
74+
%ret = tail call <2 x float> @llvm.amdgcn.cvt.pk.f32.fp8(i32 %a, i1 false)
75+
ret <2 x float> %ret
76+
}
77+
78+
;--- pk-fp8-word1-err.ll
79+
; ERR-PK-FP8-WORD1-ERR: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.cvt.pk.f32.fp8
80+
; ERR-PK-FP8-WORD1-ERR-GISEL: LLVM ERROR: cannot select: %{{[0-9]+}}:vgpr_32(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pk.f32.fp8), %{{[0-9]+}}:vgpr(s32), 1
81+
define <2 x float> @test_cvt_pk_f32_fp8_word1(i32 %a) {
82+
%ret = tail call <2 x float> @llvm.amdgcn.cvt.pk.f32.fp8(i32 %a, i1 true)
83+
ret <2 x float> %ret
84+
}
85+
86+
;--- pk-bf8-word0-err.ll
87+
; ERR-PK-BF8-WORD0-ERR: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.cvt.pk.f32.bf8
88+
; ERR-PK-BF8-WORD0-ERR-GISEL: LLVM ERROR: cannot select: %{{[0-9]+}}:vgpr_32(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pk.f32.bf8), %{{[0-9]+}}:vgpr(s32), 0
89+
define <2 x float> @test_cvt_pk_f32_bf8_word0(i32 %a) {
90+
%ret = tail call <2 x float> @llvm.amdgcn.cvt.pk.f32.bf8(i32 %a, i1 false)
91+
ret <2 x float> %ret
92+
}
93+
94+
;--- pk-bf8-word1-err.ll
95+
; ERR-PK-BF8-WORD1-ERR: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.cvt.pk.f32.bf8
96+
; ERR-PK-BF8-WORD1-ERR-GISEL: LLVM ERROR: cannot select: %{{[0-9]+}}:vgpr_32(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pk.f32.bf8), %{{[0-9]+}}:vgpr(s32), 1
97+
define <2 x float> @test_cvt_pk_f32_bf8_word1(i32 %a) {
98+
%ret = tail call <2 x float> @llvm.amdgcn.cvt.pk.f32.bf8(i32 %a, i1 true)
99+
ret <2 x float> %ret
100+
}

0 commit comments

Comments
 (0)