Skip to content

AMDGPU: Add V_CVT_F32_BF16 for gfx950 #116311

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Nov 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -438,6 +438,12 @@ def FeatureRealTrue16Insts : SubtargetFeature<"real-true16",
"Use true 16-bit registers"
>;

def FeatureBF16ConversionInsts : SubtargetFeature<"bf16-cvt-insts",
"HasBF16ConversionInsts",
"true",
"Has bf16 conversion instructions"
>;

def FeatureVOP3P : SubtargetFeature<"vop3p",
"HasVOP3PInsts",
"true",
Expand Down Expand Up @@ -1504,7 +1510,8 @@ def FeatureISAVersion9_5_Common : FeatureSet<
FeatureFP8ConversionInsts,
FeatureCvtFP8VOP1Bug,
FeatureGFX950Insts,
FeaturePrngInst
FeaturePrngInst,
FeatureBF16ConversionInsts
])>;

def FeatureISAVersion9_4_0 : FeatureSet<
Expand Down Expand Up @@ -2144,6 +2151,9 @@ def UseFakeTrue16Insts : True16PredicateClass<"Subtarget->hasTrue16BitInsts() &&
// FIXME When we default to RealTrue16 instead of Fake, change the line as follows.
// AssemblerPredicate<(all_of FeatureTrue16BitInsts, (not FeatureRealTrue16Insts))>;

def HasBF16ConversionInsts : Predicate<"Subtarget->hasBF16ConversionInsts()">,
AssemblerPredicate<(all_of FeatureBF16ConversionInsts)>;

def HasVOP3PInsts : Predicate<"Subtarget->hasVOP3PInsts()">,
AssemblerPredicate<(all_of FeatureVOP3P)>;

Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ class AMDGPUSubtarget {
bool Has16BitInsts = false;
bool HasTrue16BitInsts = false;
bool EnableRealTrue16Insts = false;
bool HasBF16ConversionInsts = false;
bool HasMadMixInsts = false;
bool HasMadMacF32Insts = false;
bool HasDsSrc2Insts = false;
Expand Down Expand Up @@ -166,6 +167,10 @@ class AMDGPUSubtarget {
// supported and the support for fake True16 instructions is removed.
bool useRealTrue16Insts() const;

bool hasBF16ConversionInsts() const {
return HasBF16ConversionInsts;
}

bool hasMadMixInsts() const {
return HasMadMixInsts;
}
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -2774,6 +2774,7 @@ def VOP_I32_I32 : VOPProfile <[i32, i32, untyped, untyped]>;
def VOP_F16_F32 : VOPProfile <[f16, f32, untyped, untyped]>;
def VOP_F32_F16 : VOPProfile <[f32, f16, untyped, untyped]>;
def VOP_I64_I64 : VOPProfile <[i64, i64, untyped, untyped]>;
def VOP_F32_BF16 : VOPProfile <[f32, bf16, untyped, untyped]>;

def VOP_F32_F32_F16 : VOPProfile <[f32, f32, f16, untyped]>;
def VOP_F32_F32_F32 : VOPProfile <[f32, f32, f32, untyped]>;
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Target/AMDGPU/VOP1Instructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,9 @@ let OtherPredicates = [UseRealTrue16Insts] in
let OtherPredicates = [UseFakeTrue16Insts] in
defm V_CVT_F32_F16_fake16 : VOP1Inst <"v_cvt_f32_f16_fake16", VOPProfile_Fake16<VOP_F32_F16>, any_fpextend>;

let SubtargetPredicate = HasBF16ConversionInsts in
defm V_CVT_F32_BF16 : VOP1Inst_t16 <"v_cvt_f32_bf16", VOP_F32_BF16>;

let ReadsModeReg = 0, mayRaiseFPException = 0 in {
defm V_CVT_RPI_I32_F32 : VOP1Inst <"v_cvt_rpi_i32_f32", VOP_I32_F32, cvt_rpi_i32_f32>;
defm V_CVT_FLR_I32_F32 : VOP1Inst <"v_cvt_flr_i32_f32", VOP_I32_F32, cvt_flr_i32_f32>;
Expand Down Expand Up @@ -1514,6 +1517,8 @@ defm V_SCREEN_PARTITION_4SE_B32 : VOP1_Real_gfx9 <0x37>;
let AssemblerPredicate = isGFX940Plus in
defm V_MOV_B64 : VOP1_Real_gfx9 <0x38>;

defm V_CVT_F32_BF16 : VOP1_Real_gfx9 <0x5b>;

defm V_CVT_F32_FP8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x54>;
defm V_CVT_F32_BF8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x55>;
defm V_CVT_PK_F32_FP8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x56>;
Expand Down
75 changes: 74 additions & 1 deletion llvm/test/MC/AMDGPU/gfx950_asm_vop1.s
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
// RUN: llvm-mc -arch=amdgcn -mcpu=gfx950 -show-encoding %s | FileCheck --check-prefix=GFX950 %s
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx950 -show-encoding %s | FileCheck --check-prefix=GFX950 %s
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx940 %s 2>&1 | FileCheck -check-prefix=GFX940-ERR --strict-whitespace %s

v_prng_b32 v5, v1
// GFX950: v_prng_b32_e32 v5, v1 ; encoding: [0x01,0xb1,0x0a,0x7e]
Expand Down Expand Up @@ -55,3 +56,75 @@ v_prng_b32 v5, src_scc
v_prng_b32 v255, 0xaf123456
// GFX950: v_prng_b32_e32 v255, 0xaf123456 ; encoding: [0xff,0xb0,0xfe,0x7f,0x56,0x34,0x12,0xaf]
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_cvt_f32_bf16 v5, v1
// GFX950: v_cvt_f32_bf16_e32 v5, v1 ; encoding: [0x01,0xb7,0x0a,0x7e]
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_cvt_f32_bf16 v5, v127
// GFX950: v_cvt_f32_bf16_e32 v5, v127 ; encoding: [0x7f,0xb7,0x0a,0x7e]
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_cvt_f32_bf16 v5, s1
// GFX950: v_cvt_f32_bf16_e32 v5, s1 ; encoding: [0x01,0xb6,0x0a,0x7e]
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_cvt_f32_bf16 v5, vcc_lo
// GFX950: v_cvt_f32_bf16_e32 v5, vcc_lo ; encoding: [0x6a,0xb6,0x0a,0x7e]
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_cvt_f32_bf16 v5, vcc_hi
// GFX950: v_cvt_f32_bf16_e32 v5, vcc_hi ; encoding: [0x6b,0xb6,0x0a,0x7e]
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_cvt_f32_bf16 v5, ttmp15
// GFX950: v_cvt_f32_bf16_e32 v5, ttmp15 ; encoding: [0x7b,0xb6,0x0a,0x7e]
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_cvt_f32_bf16 v5, m0
// GFX950: v_cvt_f32_bf16_e32 v5, m0 ; encoding: [0x7c,0xb6,0x0a,0x7e]
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_cvt_f32_bf16 v5, exec_lo
// GFX950: v_cvt_f32_bf16_e32 v5, exec_lo ; encoding: [0x7e,0xb6,0x0a,0x7e]
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_cvt_f32_bf16 v5, exec_hi
// GFX950: v_cvt_f32_bf16_e32 v5, exec_hi ; encoding: [0x7f,0xb6,0x0a,0x7e]
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_cvt_f32_bf16 v5, -1
// GFX950: v_cvt_f32_bf16_e32 v5, -1 ; encoding: [0xc1,0xb6,0x0a,0x7e]
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_cvt_f32_bf16 v5, 0.5
// GFX950: v_cvt_f32_bf16_e32 v5, 0.5 ; encoding: [0xf0,0xb6,0x0a,0x7e]
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_cvt_f32_bf16 v5, src_scc
// GFX950: v_cvt_f32_bf16_e32 v5, src_scc ; encoding: [0xfd,0xb6,0x0a,0x7e]
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_cvt_f32_bf16 v127, 0x8000
// GFX950: v_cvt_f32_bf16_e32 v127, 0x8000 ; encoding: [0xff,0xb6,0xfe,0x7e,0x00,0x80,0x00,0x00]
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_cvt_f32_bf16 v5, -v1
// GFX950: v_cvt_f32_bf16_e64 v5, -v1 ; encoding: [0x05,0x00,0x9b,0xd1,0x01,0x01,0x00,0x20]
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_cvt_f32_bf16 v5, |v1|
// GFX950: v_cvt_f32_bf16_e64 v5, |v1| ; encoding: [0x05,0x01,0x9b,0xd1,0x01,0x01,0x00,0x00]
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_cvt_f32_bf16 v5, -|v1|
// GFX950: v_cvt_f32_bf16_e64 v5, -|v1| ; encoding: [0x05,0x01,0x9b,0xd1,0x01,0x01,0x00,0x20]
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_cvt_f32_bf16 v5, v1 clamp mul:2
// GFX950: v_cvt_f32_bf16_e64 v5, v1 clamp mul:2 ; encoding: [0x05,0x80,0x9b,0xd1,0x01,0x01,0x00,0x08]
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_cvt_f32_bf16_e64 v5, v1 clamp div:2
// GFX950: v_cvt_f32_bf16_e64 v5, v1 clamp div:2 ; encoding: [0x05,0x80,0x9b,0xd1,0x01,0x01,0x00,0x18]
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
110 changes: 109 additions & 1 deletion llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -40,4 +40,112 @@
0xfd,0xb0,0x0a,0x7e

# GFX950: v_prng_b32_e32 v255, 0xaf123456 ; encoding: [0xff,0xb0,0xfe,0x7f,0x56,0x34,0x12,0xaf]
0xff,0xb0,0xfe,0x7f,0x56,0x34,0x12,0xaf
0xff,0xb0,0xfe,0x7f,0x56,0x34,0x12,0xaf

# GFX950: v_cvt_f32_bf16_e32 v5, v1 ; encoding: [0x01,0xb7,0x0a,0x7e]
0x01,0xb7,0x0a,0x7e

# GFX950: v_cvt_f32_bf16_e32 v5, v127 ; encoding: [0x7f,0xb7,0x0a,0x7e]
0x7f,0xb7,0x0a,0x7e

# GFX950: v_cvt_f32_bf16_e32 v5, s1 ; encoding: [0x01,0xb6,0x0a,0x7e]
0x01,0xb6,0x0a,0x7e

# GFX950: v_cvt_f32_bf16_e32 v5, vcc_lo ; encoding: [0x6a,0xb6,0x0a,0x7e]
0x6a,0xb6,0x0a,0x7e

# GFX950: v_cvt_f32_bf16_e32 v5, vcc_hi ; encoding: [0x6b,0xb6,0x0a,0x7e]
0x6b,0xb6,0x0a,0x7e

# GFX950: v_cvt_f32_bf16_e32 v5, ttmp15 ; encoding: [0x7b,0xb6,0x0a,0x7e]
0x7b,0xb6,0x0a,0x7e

# GFX950: v_cvt_f32_bf16_e32 v5, m0 ; encoding: [0x7c,0xb6,0x0a,0x7e]
0x7c,0xb6,0x0a,0x7e

# GFX950: v_cvt_f32_bf16_e32 v5, exec_lo ; encoding: [0x7e,0xb6,0x0a,0x7e]
0x7e,0xb6,0x0a,0x7e

# GFX950: v_cvt_f32_bf16_e32 v5, exec_hi ; encoding: [0x7f,0xb6,0x0a,0x7e]
0x7f,0xb6,0x0a,0x7e

# GFX950: v_cvt_f32_bf16_e32 v5, -1 ; encoding: [0xc1,0xb6,0x0a,0x7e]
0xc1,0xb6,0x0a,0x7e

# GFX950: v_cvt_f32_bf16_e32 v5, 0.5 ; encoding: [0xf0,0xb6,0x0a,0x7e]
0xf0,0xb6,0x0a,0x7e

# GFX950: v_cvt_f32_bf16_e32 v5, src_scc ; encoding: [0xfd,0xb6,0x0a,0x7e]
0xfd,0xb6,0x0a,0x7e

# GFX950: v_cvt_f32_bf16_e32 v127, 0x8000 ; encoding: [0xff,0xb6,0xfe,0x7e,0x00,0x80,0x00,0x00]
0xff,0xb6,0xfe,0x7e,0x00,0x80,0x00,0x00

# GFX950: v_cvt_f32_bf16_e64 v5, -v1 ; encoding: [0x05,0x00,0x9b,0xd1,0x01,0x01,0x00,0x20]
0x05,0x00,0x9b,0xd1,0x01,0x01,0x00,0x20

# GFX950: v_cvt_f32_bf16_e64 v5, |v1| ; encoding: [0x05,0x01,0x9b,0xd1,0x01,0x01,0x00,0x00]
0x05,0x01,0x9b,0xd1,0x01,0x01,0x00,0x00

# GFX950: v_cvt_f32_bf16_e64 v5, -|v1| ; encoding: [0x05,0x01,0x9b,0xd1,0x01,0x01,0x00,0x20]
0x05,0x01,0x9b,0xd1,0x01,0x01,0x00,0x20

# GFX950: v_cvt_f32_bf16_e64 v5, 0.5 clamp mul:2 ; encoding: [0x05,0x80,0x9b,0xd1,0xf0,0x00,0x00,0x08]
0x05,0x80,0x9b,0xd1,0xf0,0x00,0x00,0x08

# GFX950: v_cvt_f32_bf16_e64 v5, v1 clamp div:2 ; encoding: [0x05,0x80,0x9b,0xd1,0x01,0x01,0x00,0x18]
0x05,0x80,0x9b,0xd1,0x01,0x01,0x00,0x18

# GFX950: v_cvt_f32_bf16_e64 v5, v1 clamp div:2 ; encoding: [0x05,0x80,0x9b,0xd1,0x01,0x01,0x00,0x18]
0x05,0x80,0x9b,0xd1,0x01,0x01,0x00,0x18

# GFX950: v_cvt_f32_bf16_e32 v5, v1 ; encoding: [0x01,0xb7,0x0a,0x7e]
0x01,0xb7,0x0a,0x7e

# GFX950: v_cvt_f32_bf16_e32 v5, v127 ; encoding: [0x7f,0xb7,0x0a,0x7e]
0x7f,0xb7,0x0a,0x7e

# GFX950: v_cvt_f32_bf16_e32 v5, s1 ; encoding: [0x01,0xb6,0x0a,0x7e]
0x01,0xb6,0x0a,0x7e

# GFX950: v_cvt_f32_bf16_e32 v5, vcc_lo ; encoding: [0x6a,0xb6,0x0a,0x7e]
0x6a,0xb6,0x0a,0x7e

# GFX950: v_cvt_f32_bf16_e32 v5, vcc_hi ; encoding: [0x6b,0xb6,0x0a,0x7e]
0x6b,0xb6,0x0a,0x7e

# GFX950: v_cvt_f32_bf16_e32 v5, ttmp15 ; encoding: [0x7b,0xb6,0x0a,0x7e]
0x7b,0xb6,0x0a,0x7e

# GFX950: v_cvt_f32_bf16_e32 v5, m0 ; encoding: [0x7c,0xb6,0x0a,0x7e]
0x7c,0xb6,0x0a,0x7e

# GFX950: v_cvt_f32_bf16_e32 v5, exec_lo ; encoding: [0x7e,0xb6,0x0a,0x7e]
0x7e,0xb6,0x0a,0x7e

# GFX950: v_cvt_f32_bf16_e32 v5, exec_hi ; encoding: [0x7f,0xb6,0x0a,0x7e]
0x7f,0xb6,0x0a,0x7e

# GFX950: v_cvt_f32_bf16_e32 v5, -1 ; encoding: [0xc1,0xb6,0x0a,0x7e]
0xc1,0xb6,0x0a,0x7e

# GFX950: v_cvt_f32_bf16_e32 v5, 0.5 ; encoding: [0xf0,0xb6,0x0a,0x7e]
0xf0,0xb6,0x0a,0x7e

# GFX950: v_cvt_f32_bf16_e32 v5, src_scc ; encoding: [0xfd,0xb6,0x0a,0x7e]
0xfd,0xb6,0x0a,0x7e

# GFX950: v_cvt_f32_bf16_e32 v127, 0x8000 ; encoding: [0xff,0xb6,0xfe,0x7e,0x00,0x80,0x00,0x00]
0xff,0xb6,0xfe,0x7e,0x00,0x80,0x00,0x00

# GFX950: v_cvt_f32_bf16_e64 v5, -v1 ; encoding: [0x05,0x00,0x9b,0xd1,0x01,0x01,0x00,0x20]
0x05,0x00,0x9b,0xd1,0x01,0x01,0x00,0x20

# GFX950: v_cvt_f32_bf16_e64 v5, |v1| ; encoding: [0x05,0x01,0x9b,0xd1,0x01,0x01,0x00,0x00]
0x05,0x01,0x9b,0xd1,0x01,0x01,0x00,0x00

# GFX950: v_cvt_f32_bf16_e64 v5, -|v1| ; encoding: [0x05,0x01,0x9b,0xd1,0x01,0x01,0x00,0x20]
0x05,0x01,0x9b,0xd1,0x01,0x01,0x00,0x20

# GFX950: v_cvt_f32_bf16_e64 v5, v1 clamp mul:2 ; encoding: [0x05,0x80,0x9b,0xd1,0x01,0x01,0x00,0x08]
0x05,0x80,0x9b,0xd1,0x01,0x01,0x00,0x08
Loading