Skip to content

Commit a006beb

Browse files
arsenmpravinjagtap
authored andcommitted
AMDGPU: Add V_CVT_F32_BF16 for gfx950 (llvm#116311)
1 parent 4dbaf29 commit a006beb

File tree

6 files changed

+205
-3
lines changed

6 files changed

+205
-3
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -438,6 +438,12 @@ def FeatureRealTrue16Insts : SubtargetFeature<"real-true16",
438438
"Use true 16-bit registers"
439439
>;
440440

441+
def FeatureBF16ConversionInsts : SubtargetFeature<"bf16-cvt-insts",
442+
"HasBF16ConversionInsts",
443+
"true",
444+
"Has bf16 conversion instructions"
445+
>;
446+
441447
def FeatureVOP3P : SubtargetFeature<"vop3p",
442448
"HasVOP3PInsts",
443449
"true",
@@ -1506,7 +1512,8 @@ def FeatureISAVersion9_5_Common : FeatureSet<
15061512
FeatureFP8ConversionInsts,
15071513
FeatureCvtFP8VOP1Bug,
15081514
FeatureGFX950Insts,
1509-
FeaturePrngInst
1515+
FeaturePrngInst,
1516+
FeatureBF16ConversionInsts
15101517
])>;
15111518

15121519
def FeatureISAVersion9_4_0 : FeatureSet<
@@ -2139,6 +2146,9 @@ def UseFakeTrue16Insts : True16PredicateClass<"Subtarget->hasTrue16BitInsts() &&
21392146
// FIXME When we default to RealTrue16 instead of Fake, change the line as follows.
21402147
// AssemblerPredicate<(all_of FeatureTrue16BitInsts, (not FeatureRealTrue16Insts))>;
21412148

2149+
def HasBF16ConversionInsts : Predicate<"Subtarget->hasBF16ConversionInsts()">,
2150+
AssemblerPredicate<(all_of FeatureBF16ConversionInsts)>;
2151+
21422152
def HasVOP3PInsts : Predicate<"Subtarget->hasVOP3PInsts()">,
21432153
AssemblerPredicate<(all_of FeatureVOP3P)>;
21442154

llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ class AMDGPUSubtarget {
5151
bool Has16BitInsts = false;
5252
bool HasTrue16BitInsts = false;
5353
bool EnableRealTrue16Insts = false;
54+
bool HasBF16ConversionInsts = false;
5455
bool HasMadMixInsts = false;
5556
bool HasMadMacF32Insts = false;
5657
bool HasDsSrc2Insts = false;
@@ -166,6 +167,10 @@ class AMDGPUSubtarget {
166167
// supported and the support for fake True16 instructions is removed.
167168
bool useRealTrue16Insts() const;
168169

170+
bool hasBF16ConversionInsts() const {
171+
return HasBF16ConversionInsts;
172+
}
173+
169174
bool hasMadMixInsts() const {
170175
return HasMadMixInsts;
171176
}

llvm/lib/Target/AMDGPU/SIInstrInfo.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2575,6 +2575,7 @@ def VOP_I32_I32 : VOPProfile <[i32, i32, untyped, untyped]>;
25752575
def VOP_F16_F32 : VOPProfile <[f16, f32, untyped, untyped]>;
25762576
def VOP_F32_F16 : VOPProfile <[f32, f16, untyped, untyped]>;
25772577
def VOP_I64_I64 : VOPProfile <[i64, i64, untyped, untyped]>;
2578+
def VOP_F32_BF16 : VOPProfile <[f32, bf16, untyped, untyped]>;
25782579

25792580
def VOP_F32_F32_F16 : VOPProfile <[f32, f32, f16, untyped]>;
25802581
def VOP_F32_F32_F32 : VOPProfile <[f32, f32, f32, untyped]>;

llvm/lib/Target/AMDGPU/VOP1Instructions.td

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,9 @@ defm V_CVT_F32_F16 : VOP1Inst <"v_cvt_f32_f16", VOP_F32_F16, any_fpextend>;
304304
let OtherPredicates = [HasTrue16BitInsts] in
305305
defm V_CVT_F32_F16_t16 : VOP1Inst <"v_cvt_f32_f16_t16", VOPProfile_Fake16<VOP_F32_F16>, any_fpextend>;
306306

307+
let SubtargetPredicate = HasBF16ConversionInsts in
308+
defm V_CVT_F32_BF16 : VOP1Inst_t16 <"v_cvt_f32_bf16", VOP_F32_BF16>;
309+
307310
let ReadsModeReg = 0, mayRaiseFPException = 0 in {
308311
defm V_CVT_RPI_I32_F32 : VOP1Inst <"v_cvt_rpi_i32_f32", VOP_I32_F32, cvt_rpi_i32_f32>;
309312
defm V_CVT_FLR_I32_F32 : VOP1Inst <"v_cvt_flr_i32_f32", VOP_I32_F32, cvt_flr_i32_f32>;
@@ -1453,6 +1456,8 @@ defm V_SCREEN_PARTITION_4SE_B32 : VOP1_Real_gfx9 <0x37>;
14531456
let AssemblerPredicate = isGFX940Plus in
14541457
defm V_MOV_B64 : VOP1_Real_gfx9 <0x38>;
14551458

1459+
defm V_CVT_F32_BF16 : VOP1_Real_gfx9 <0x5b>;
1460+
14561461
defm V_CVT_F32_FP8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x54>;
14571462
defm V_CVT_F32_BF8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x55>;
14581463
defm V_CVT_PK_F32_FP8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x56>;

llvm/test/MC/AMDGPU/gfx950_asm_vop1.s

Lines changed: 74 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
// RUN: llvm-mc -arch=amdgcn -mcpu=gfx950 -show-encoding %s | FileCheck --check-prefix=GFX950 %s
1+
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx950 -show-encoding %s | FileCheck --check-prefix=GFX950 %s
2+
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx940 %s 2>&1 | FileCheck -check-prefix=GFX940-ERR --strict-whitespace %s
23

34
v_prng_b32 v5, v1
45
// GFX950: v_prng_b32_e32 v5, v1 ; encoding: [0x01,0xb1,0x0a,0x7e]
@@ -55,3 +56,75 @@ v_prng_b32 v5, src_scc
5556
v_prng_b32 v255, 0xaf123456
5657
// GFX950: v_prng_b32_e32 v255, 0xaf123456 ; encoding: [0xff,0xb0,0xfe,0x7f,0x56,0x34,0x12,0xaf]
5758
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
59+
60+
v_cvt_f32_bf16 v5, v1
61+
// GFX950: v_cvt_f32_bf16_e32 v5, v1 ; encoding: [0x01,0xb7,0x0a,0x7e]
62+
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
63+
64+
v_cvt_f32_bf16 v5, v127
65+
// GFX950: v_cvt_f32_bf16_e32 v5, v127 ; encoding: [0x7f,0xb7,0x0a,0x7e]
66+
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
67+
68+
v_cvt_f32_bf16 v5, s1
69+
// GFX950: v_cvt_f32_bf16_e32 v5, s1 ; encoding: [0x01,0xb6,0x0a,0x7e]
70+
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
71+
72+
v_cvt_f32_bf16 v5, vcc_lo
73+
// GFX950: v_cvt_f32_bf16_e32 v5, vcc_lo ; encoding: [0x6a,0xb6,0x0a,0x7e]
74+
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
75+
76+
v_cvt_f32_bf16 v5, vcc_hi
77+
// GFX950: v_cvt_f32_bf16_e32 v5, vcc_hi ; encoding: [0x6b,0xb6,0x0a,0x7e]
78+
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
79+
80+
v_cvt_f32_bf16 v5, ttmp15
81+
// GFX950: v_cvt_f32_bf16_e32 v5, ttmp15 ; encoding: [0x7b,0xb6,0x0a,0x7e]
82+
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
83+
84+
v_cvt_f32_bf16 v5, m0
85+
// GFX950: v_cvt_f32_bf16_e32 v5, m0 ; encoding: [0x7c,0xb6,0x0a,0x7e]
86+
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
87+
88+
v_cvt_f32_bf16 v5, exec_lo
89+
// GFX950: v_cvt_f32_bf16_e32 v5, exec_lo ; encoding: [0x7e,0xb6,0x0a,0x7e]
90+
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
91+
92+
v_cvt_f32_bf16 v5, exec_hi
93+
// GFX950: v_cvt_f32_bf16_e32 v5, exec_hi ; encoding: [0x7f,0xb6,0x0a,0x7e]
94+
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
95+
96+
v_cvt_f32_bf16 v5, -1
97+
// GFX950: v_cvt_f32_bf16_e32 v5, -1 ; encoding: [0xc1,0xb6,0x0a,0x7e]
98+
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
99+
100+
v_cvt_f32_bf16 v5, 0.5
101+
// GFX950: v_cvt_f32_bf16_e32 v5, 0.5 ; encoding: [0xf0,0xb6,0x0a,0x7e]
102+
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
103+
104+
v_cvt_f32_bf16 v5, src_scc
105+
// GFX950: v_cvt_f32_bf16_e32 v5, src_scc ; encoding: [0xfd,0xb6,0x0a,0x7e]
106+
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
107+
108+
v_cvt_f32_bf16 v127, 0x8000
109+
// GFX950: v_cvt_f32_bf16_e32 v127, 0x8000 ; encoding: [0xff,0xb6,0xfe,0x7e,0x00,0x80,0x00,0x00]
110+
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
111+
112+
v_cvt_f32_bf16 v5, -v1
113+
// GFX950: v_cvt_f32_bf16_e64 v5, -v1 ; encoding: [0x05,0x00,0x9b,0xd1,0x01,0x01,0x00,0x20]
114+
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
115+
116+
v_cvt_f32_bf16 v5, |v1|
117+
// GFX950: v_cvt_f32_bf16_e64 v5, |v1| ; encoding: [0x05,0x01,0x9b,0xd1,0x01,0x01,0x00,0x00]
118+
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
119+
120+
v_cvt_f32_bf16 v5, -|v1|
121+
// GFX950: v_cvt_f32_bf16_e64 v5, -|v1| ; encoding: [0x05,0x01,0x9b,0xd1,0x01,0x01,0x00,0x20]
122+
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
123+
124+
v_cvt_f32_bf16 v5, v1 clamp mul:2
125+
// GFX950: v_cvt_f32_bf16_e64 v5, v1 clamp mul:2 ; encoding: [0x05,0x80,0x9b,0xd1,0x01,0x01,0x00,0x08]
126+
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
127+
128+
v_cvt_f32_bf16_e64 v5, v1 clamp div:2
129+
// GFX950: v_cvt_f32_bf16_e64 v5, v1 clamp div:2 ; encoding: [0x05,0x80,0x9b,0xd1,0x01,0x01,0x00,0x18]
130+
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU

llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop1.txt

Lines changed: 109 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,4 +40,112 @@
4040
0xfd,0xb0,0x0a,0x7e
4141

4242
# GFX950: v_prng_b32_e32 v255, 0xaf123456 ; encoding: [0xff,0xb0,0xfe,0x7f,0x56,0x34,0x12,0xaf]
43-
0xff,0xb0,0xfe,0x7f,0x56,0x34,0x12,0xaf
43+
0xff,0xb0,0xfe,0x7f,0x56,0x34,0x12,0xaf
44+
45+
# GFX950: v_cvt_f32_bf16_e32 v5, v1 ; encoding: [0x01,0xb7,0x0a,0x7e]
46+
0x01,0xb7,0x0a,0x7e
47+
48+
# GFX950: v_cvt_f32_bf16_e32 v5, v127 ; encoding: [0x7f,0xb7,0x0a,0x7e]
49+
0x7f,0xb7,0x0a,0x7e
50+
51+
# GFX950: v_cvt_f32_bf16_e32 v5, s1 ; encoding: [0x01,0xb6,0x0a,0x7e]
52+
0x01,0xb6,0x0a,0x7e
53+
54+
# GFX950: v_cvt_f32_bf16_e32 v5, vcc_lo ; encoding: [0x6a,0xb6,0x0a,0x7e]
55+
0x6a,0xb6,0x0a,0x7e
56+
57+
# GFX950: v_cvt_f32_bf16_e32 v5, vcc_hi ; encoding: [0x6b,0xb6,0x0a,0x7e]
58+
0x6b,0xb6,0x0a,0x7e
59+
60+
# GFX950: v_cvt_f32_bf16_e32 v5, ttmp15 ; encoding: [0x7b,0xb6,0x0a,0x7e]
61+
0x7b,0xb6,0x0a,0x7e
62+
63+
# GFX950: v_cvt_f32_bf16_e32 v5, m0 ; encoding: [0x7c,0xb6,0x0a,0x7e]
64+
0x7c,0xb6,0x0a,0x7e
65+
66+
# GFX950: v_cvt_f32_bf16_e32 v5, exec_lo ; encoding: [0x7e,0xb6,0x0a,0x7e]
67+
0x7e,0xb6,0x0a,0x7e
68+
69+
# GFX950: v_cvt_f32_bf16_e32 v5, exec_hi ; encoding: [0x7f,0xb6,0x0a,0x7e]
70+
0x7f,0xb6,0x0a,0x7e
71+
72+
# GFX950: v_cvt_f32_bf16_e32 v5, -1 ; encoding: [0xc1,0xb6,0x0a,0x7e]
73+
0xc1,0xb6,0x0a,0x7e
74+
75+
# GFX950: v_cvt_f32_bf16_e32 v5, 0.5 ; encoding: [0xf0,0xb6,0x0a,0x7e]
76+
0xf0,0xb6,0x0a,0x7e
77+
78+
# GFX950: v_cvt_f32_bf16_e32 v5, src_scc ; encoding: [0xfd,0xb6,0x0a,0x7e]
79+
0xfd,0xb6,0x0a,0x7e
80+
81+
# GFX950: v_cvt_f32_bf16_e32 v127, 0x8000 ; encoding: [0xff,0xb6,0xfe,0x7e,0x00,0x80,0x00,0x00]
82+
0xff,0xb6,0xfe,0x7e,0x00,0x80,0x00,0x00
83+
84+
# GFX950: v_cvt_f32_bf16_e64 v5, -v1 ; encoding: [0x05,0x00,0x9b,0xd1,0x01,0x01,0x00,0x20]
85+
0x05,0x00,0x9b,0xd1,0x01,0x01,0x00,0x20
86+
87+
# GFX950: v_cvt_f32_bf16_e64 v5, |v1| ; encoding: [0x05,0x01,0x9b,0xd1,0x01,0x01,0x00,0x00]
88+
0x05,0x01,0x9b,0xd1,0x01,0x01,0x00,0x00
89+
90+
# GFX950: v_cvt_f32_bf16_e64 v5, -|v1| ; encoding: [0x05,0x01,0x9b,0xd1,0x01,0x01,0x00,0x20]
91+
0x05,0x01,0x9b,0xd1,0x01,0x01,0x00,0x20
92+
93+
# GFX950: v_cvt_f32_bf16_e64 v5, 0.5 clamp mul:2 ; encoding: [0x05,0x80,0x9b,0xd1,0xf0,0x00,0x00,0x08]
94+
0x05,0x80,0x9b,0xd1,0xf0,0x00,0x00,0x08
95+
96+
# GFX950: v_cvt_f32_bf16_e64 v5, v1 clamp div:2 ; encoding: [0x05,0x80,0x9b,0xd1,0x01,0x01,0x00,0x18]
97+
0x05,0x80,0x9b,0xd1,0x01,0x01,0x00,0x18
98+
99+
# GFX950: v_cvt_f32_bf16_e64 v5, v1 clamp div:2 ; encoding: [0x05,0x80,0x9b,0xd1,0x01,0x01,0x00,0x18]
100+
0x05,0x80,0x9b,0xd1,0x01,0x01,0x00,0x18
101+
102+
# GFX950: v_cvt_f32_bf16_e32 v5, v1 ; encoding: [0x01,0xb7,0x0a,0x7e]
103+
0x01,0xb7,0x0a,0x7e
104+
105+
# GFX950: v_cvt_f32_bf16_e32 v5, v127 ; encoding: [0x7f,0xb7,0x0a,0x7e]
106+
0x7f,0xb7,0x0a,0x7e
107+
108+
# GFX950: v_cvt_f32_bf16_e32 v5, s1 ; encoding: [0x01,0xb6,0x0a,0x7e]
109+
0x01,0xb6,0x0a,0x7e
110+
111+
# GFX950: v_cvt_f32_bf16_e32 v5, vcc_lo ; encoding: [0x6a,0xb6,0x0a,0x7e]
112+
0x6a,0xb6,0x0a,0x7e
113+
114+
# GFX950: v_cvt_f32_bf16_e32 v5, vcc_hi ; encoding: [0x6b,0xb6,0x0a,0x7e]
115+
0x6b,0xb6,0x0a,0x7e
116+
117+
# GFX950: v_cvt_f32_bf16_e32 v5, ttmp15 ; encoding: [0x7b,0xb6,0x0a,0x7e]
118+
0x7b,0xb6,0x0a,0x7e
119+
120+
# GFX950: v_cvt_f32_bf16_e32 v5, m0 ; encoding: [0x7c,0xb6,0x0a,0x7e]
121+
0x7c,0xb6,0x0a,0x7e
122+
123+
# GFX950: v_cvt_f32_bf16_e32 v5, exec_lo ; encoding: [0x7e,0xb6,0x0a,0x7e]
124+
0x7e,0xb6,0x0a,0x7e
125+
126+
# GFX950: v_cvt_f32_bf16_e32 v5, exec_hi ; encoding: [0x7f,0xb6,0x0a,0x7e]
127+
0x7f,0xb6,0x0a,0x7e
128+
129+
# GFX950: v_cvt_f32_bf16_e32 v5, -1 ; encoding: [0xc1,0xb6,0x0a,0x7e]
130+
0xc1,0xb6,0x0a,0x7e
131+
132+
# GFX950: v_cvt_f32_bf16_e32 v5, 0.5 ; encoding: [0xf0,0xb6,0x0a,0x7e]
133+
0xf0,0xb6,0x0a,0x7e
134+
135+
# GFX950: v_cvt_f32_bf16_e32 v5, src_scc ; encoding: [0xfd,0xb6,0x0a,0x7e]
136+
0xfd,0xb6,0x0a,0x7e
137+
138+
# GFX950: v_cvt_f32_bf16_e32 v127, 0x8000 ; encoding: [0xff,0xb6,0xfe,0x7e,0x00,0x80,0x00,0x00]
139+
0xff,0xb6,0xfe,0x7e,0x00,0x80,0x00,0x00
140+
141+
# GFX950: v_cvt_f32_bf16_e64 v5, -v1 ; encoding: [0x05,0x00,0x9b,0xd1,0x01,0x01,0x00,0x20]
142+
0x05,0x00,0x9b,0xd1,0x01,0x01,0x00,0x20
143+
144+
# GFX950: v_cvt_f32_bf16_e64 v5, |v1| ; encoding: [0x05,0x01,0x9b,0xd1,0x01,0x01,0x00,0x00]
145+
0x05,0x01,0x9b,0xd1,0x01,0x01,0x00,0x00
146+
147+
# GFX950: v_cvt_f32_bf16_e64 v5, -|v1| ; encoding: [0x05,0x01,0x9b,0xd1,0x01,0x01,0x00,0x20]
148+
0x05,0x01,0x9b,0xd1,0x01,0x01,0x00,0x20
149+
150+
# GFX950: v_cvt_f32_bf16_e64 v5, v1 clamp mul:2 ; encoding: [0x05,0x80,0x9b,0xd1,0x01,0x01,0x00,0x08]
151+
0x05,0x80,0x9b,0xd1,0x01,0x01,0x00,0x08

0 commit comments

Comments
 (0)