Skip to content

AMDGPU: MC support for v_cvt_scalef32_pk32_f32_[fp|bf]6 of gfx950 #117590

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Nov 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 12 additions & 2 deletions llvm/lib/Target/AMDGPU/AMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -402,11 +402,17 @@ def FeatureFP4ConversionScaleInsts : SubtargetFeature<"fp4-cvt-scale-insts",
"Has fp4 conversion scale instructions"
>;

def FeatureFP6BF6ConversionScaleInsts : SubtargetFeature<"fp6bf6-cvt-scale-insts",
"HasFP6BF6ConversionScaleInsts",
"true",
"Has fp6 and bf6 conversion scale instructions"
>;

def FeatureGFX950Insts : SubtargetFeature<"gfx950-insts",
"GFX950Insts",
"true",
"Additional instructions for GFX950+",
[FeaturePermlane16Swap, FeaturePermlane32Swap, FeatureFP8ConversionScaleInsts, FeatureBF8ConversionScaleInsts, FeatureFP4ConversionScaleInsts]
[FeaturePermlane16Swap, FeaturePermlane32Swap, FeatureFP8ConversionScaleInsts, FeatureBF8ConversionScaleInsts, FeatureFP4ConversionScaleInsts, FeatureFP6BF6ConversionScaleInsts]
>;

def FeatureGFX10Insts : SubtargetFeature<"gfx10-insts",
Expand Down Expand Up @@ -1552,7 +1558,8 @@ def FeatureISAVersion9_5_Common : FeatureSet<
FeatureBitOp3Insts,
FeatureFP8ConversionScaleInsts,
FeatureBF8ConversionScaleInsts,
FeatureFP4ConversionScaleInsts
FeatureFP4ConversionScaleInsts,
FeatureFP6BF6ConversionScaleInsts
])>;

def FeatureISAVersion9_4_0 : FeatureSet<
Expand Down Expand Up @@ -2435,6 +2442,9 @@ def HasBF8ConversionScaleInsts : Predicate<"Subtarget->hasBF8ConversionScaleInst
def HasFP4ConversionScaleInsts : Predicate<"Subtarget->hasFP4ConversionScaleInsts()">,
AssemblerPredicate<(all_of FeatureFP4ConversionScaleInsts)>;

def HasFP6BF6ConversionScaleInsts : Predicate<"Subtarget->hasFP6BF6ConversionScaleInsts()">,
AssemblerPredicate<(all_of FeatureFP6BF6ConversionScaleInsts)>;

def HasGDS : Predicate<"Subtarget->hasGDS()">;

def HasGWS : Predicate<"Subtarget->hasGWS()">;
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ class AMDGPUSubtarget {
bool HasFP8ConversionScaleInsts = false;
bool HasBF8ConversionScaleInsts = false;
bool HasFP4ConversionScaleInsts = false;
bool HasFP6BF6ConversionScaleInsts = false;
bool EnableRealTrue16Insts = false;
bool HasBF16ConversionInsts = false;
bool HasMadMixInsts = false;
Expand Down Expand Up @@ -184,6 +185,8 @@ class AMDGPUSubtarget {

bool hasFP4ConversionScaleInsts() const { return HasFP4ConversionScaleInsts; }

bool hasFP6BF6ConversionScaleInsts() const { return HasFP6BF6ConversionScaleInsts; }

bool hasMadMacF32Insts() const {
return HasMadMacF32Insts || !isGCN();
}
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1530,6 +1530,7 @@ unsigned AMDGPUDisassembler::getVgprClassId(const OpWidthTy Width) const {
case OPWV232: return VReg_64RegClassID;
case OPW96: return VReg_96RegClassID;
case OPW128: return VReg_128RegClassID;
case OPW192: return VReg_192RegClassID;
case OPW160: return VReg_160RegClassID;
case OPW256: return VReg_256RegClassID;
case OPW288: return VReg_288RegClassID;
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,7 @@ class AMDGPUDisassembler : public MCDisassembler {
OPW96,
OPW128,
OPW160,
OPW192,
OPW256,
OPW288,
OPW320,
Expand Down
10 changes: 8 additions & 2 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -1696,7 +1696,8 @@ class getVALUDstForVT<ValueType VT, bit IsTrue16 = 0, bit IsVOP3Encoding = 0> {
defvar op16 = !if(IsTrue16, !if (IsVOP3Encoding, VOPDstOperand_t16,
VOPDstOperand_t16Lo128),
VOPDstOperand<VGPR_32>);
RegisterOperand ret = !cond(!eq(VT.Size, 256) : VOPDstOperand<VReg_256>,
RegisterOperand ret = !cond(!eq(VT.Size, 1024) : VOPDstOperand<VReg_1024>,
!eq(VT.Size, 256) : VOPDstOperand<VReg_256>,
!eq(VT.Size, 128) : VOPDstOperand<VReg_128>,
!eq(VT.Size, 64) : VOPDstOperand<VReg_64>,
!eq(VT.Size, 32) : VOPDstOperand<VGPR_32>,
Expand Down Expand Up @@ -1752,7 +1753,8 @@ class getSOPSrcForVT<ValueType VT> {
// Returns the vreg register class to use for source operand given VT
class getVregSrcForVT<ValueType VT, bit IsTrue16 = 0, bit IsFake16 = 1> {
RegisterOperand ret =
!cond(!eq(VT.Size, 128) : RegisterOperand<VReg_128>,
!cond(!eq(VT.Size, 192) : RegisterOperand<VReg_192>,
!eq(VT.Size, 128) : RegisterOperand<VReg_128>,
!eq(VT.Size, 96) : RegisterOperand<VReg_96>,
!eq(VT.Size, 64) : RegisterOperand<VReg_64>,
!eq(VT.Size, 48) : RegisterOperand<VReg_64>,
Expand Down Expand Up @@ -1785,6 +1787,7 @@ class getVOP3SrcForVT<ValueType VT, bit IsTrue16 = 0> {
!eq(VT, v2i16) : VSrc_v2b16,
!eq(VT, v4f16) : AVSrc_64,
!eq(VT, v4bf16) : AVSrc_64,
!eq(VT.Size, 192) : VRegSrc_192,
!eq(VT.Size, 128) : VRegSrc_128,
!eq(VT.Size, 96) : VRegSrc_96,
!eq(VT.Size, 64) : VSrc_b64,
Expand Down Expand Up @@ -2828,6 +2831,9 @@ def VOP_I32_I32_I32_ARITH : VOPProfile <[i32, i32, i32, untyped], /*EnableClamp=
def VOP_V2F16_F32_F32 : VOPProfile <[v2f16, f32, f32, untyped]>;
def VOP_F32_F16_F16_F16 : VOPProfile <[f32, f16, f16, f16]>;
def VOP_V2BF16_F32_F32 : VOPProfile <[v2bf16, f32, f32, untyped]>;
def VOP_V32F32_V6I32_F32 : VOPProfile <[v32f32, v6i32, f32, untyped]>;
def VOP_V32F16_V6I32_F32 : VOPProfile <[v32f16, v6i32, f32, untyped]>;
def VOP_V32BF16_V6I32_F32 : VOPProfile <[v32bf16, v6i32, f32, untyped]>;

def VOP_I64_I64_I32 : VOPProfile <[i64, i64, i32, untyped]>;
def VOP_I64_I32_I64 : VOPProfile <[i64, i32, i64, untyped]>;
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/SIRegisterInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -1249,6 +1249,7 @@ def VRegSrc_32 : SrcReg9<VGPR_32, "OPW32">;
def VRegSrc_64 : SrcReg9<VReg_64, "OPW64">;
def VRegSrc_96 : SrcReg9<VReg_96, "OPW96">;
def VRegSrc_128: SrcReg9<VReg_128, "OPW128">;
def VRegSrc_192: SrcReg9<VReg_192, "OPW192">;
def VRegSrc_256: SrcReg9<VReg_256, "OPW256">;
def VRegOrLdsSrc_32 : SrcReg9<VRegOrLds_32, "OPW32">;

Expand Down
22 changes: 22 additions & 0 deletions llvm/lib/Target/AMDGPU/VOP3Instructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -925,6 +925,19 @@ def VOP3_CVT_SCALE_PK_FP8BF8_F16BF16_Profile : VOP3_Profile<VOPProfile<[i32, v2f
let HasOMod = 0;
}

class VOP3_CVT_SCALEF32_PK_F864_Profile<VOPProfile P> : VOP3_Profile<P> {
let HasModifiers = 0;
let HasSrc0IntMods = 0;
let HasSrc1IntMods = 0;
let HasOMod = 0;
let HasOpSel = 0;
let HasClamp = 0;
let HasExtDPP = 0;
let HasExt32BitDPP = 0;
let HasExtVOP3DPP = 0;
let HasExt64BitDPP = 0;
}

let SubtargetPredicate = HasFP8ConversionScaleInsts, mayRaiseFPException = 0 in {
defm V_CVT_SCALEF32_F16_FP8 : VOP3Inst<"v_cvt_scalef32_f16_fp8", VOP3_CVT_SCALE_F1632_FP8BF8_Profile<f16>>;
defm V_CVT_SCALEF32_F32_FP8 : VOP3Inst<"v_cvt_scalef32_f32_fp8", VOP3_CVT_SCALE_F1632_FP8BF8_Profile<f32>>;
Expand All @@ -950,6 +963,11 @@ let SubtargetPredicate = HasFP4ConversionScaleInsts, mayRaiseFPException = 0 in
defm V_CVT_SCALEF32_PK_BF16_FP4 : VOP3Inst<"v_cvt_scalef32_pk_bf16_fp4", VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile<v2bf16>>;
}

let SubtargetPredicate = HasFP6BF6ConversionScaleInsts, mayRaiseFPException = 0 in {
defm V_CVT_SCALEF32_PK32_F32_FP6 : VOP3Inst<"v_cvt_scalef32_pk32_f32_fp6", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V32F32_V6I32_F32>>;
defm V_CVT_SCALEF32_PK32_F32_BF6 : VOP3Inst<"v_cvt_scalef32_pk32_f32_bf6", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V32F32_V6I32_F32>>;
}

let SubtargetPredicate = isGFX10Plus in {
let isCommutable = 1, isReMaterializable = 1 in {
defm V_XOR3_B32 : VOP3Inst <"v_xor3_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
Expand Down Expand Up @@ -1894,3 +1912,7 @@ defm V_CVT_SCALEF32_PK_FP4_F32 : VOP3OpSel_Real_gfx9 <0x23d>;
defm V_CVT_SCALEF32_PK_F16_FP4 : VOP3OpSel_Real_gfx9 <0x250>;
defm V_CVT_SCALEF32_PK_BF16_FP4 : VOP3OpSel_Real_gfx9 <0x251>;
}
let OtherPredicates = [HasFP6BF6ConversionScaleInsts] in {
defm V_CVT_SCALEF32_PK32_F32_FP6 : VOP3_Real_gfx9<0x256, "v_cvt_scalef32_pk32_f32_fp6">;
defm V_CVT_SCALEF32_PK32_F32_BF6 : VOP3_Real_gfx9<0x257, "v_cvt_scalef32_pk32_f32_bf6">;
}
10 changes: 9 additions & 1 deletion llvm/test/MC/AMDGPU/gfx950_asm_features.s
Original file line number Diff line number Diff line change
Expand Up @@ -884,4 +884,12 @@ v_cvt_scalef32_pk_bf16_fp4 v1, v2, s3 op_sel:[1,1,0]

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, s2, 3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x51,0xd2,0x02,0x06,0x01,0x00]
v_cvt_scalef32_pk_bf16_fp4 v1, s2, 3 op_sel:[1,1,0]
v_cvt_scalef32_pk_bf16_fp4 v1, s2, 3 op_sel:[1,1,0]

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_cvt_scalef32_pk32_f32_fp6 v[2:33], v[2:7], v6 ; encoding: [0x02,0x00,0x56,0xd2,0x02,0x0d,0x02,0x00]
v_cvt_scalef32_pk32_f32_fp6 v[2:33], v[2:7], v6

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_cvt_scalef32_pk32_f32_bf6 v[2:33], v[2:7], v6 ; encoding: [0x02,0x00,0x57,0xd2,0x02,0x0d,0x02,0x00]
v_cvt_scalef32_pk32_f32_bf6 v[2:33], v[2:7], v6
24 changes: 24 additions & 0 deletions llvm/test/MC/AMDGPU/gfx950_err.s
Original file line number Diff line number Diff line change
Expand Up @@ -125,3 +125,27 @@ v_cvt_scalef32_pk_bf16_fp4 v1, v2, v3 div:2

// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
v_cvt_scalef32_pk_bf16_fp4 v1, v2, v3 clamp div:2

// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
v_cvt_scalef32_pk32_f32_fp6 v[2:33], v[2:7], v6 clamp

// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
v_cvt_scalef32_pk32_f32_fp6 v[2:33], v[2:7], v6 mul:2

// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
v_cvt_scalef32_pk32_f32_fp6 v[2:33], v[2:7], v6 div:2

// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
v_cvt_scalef32_pk32_f32_fp6 v[2:33], v[2:7], v6 clamp div:2

// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
v_cvt_scalef32_pk32_f32_bf6 v[2:33], v[2:7], v6 clamp

// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
v_cvt_scalef32_pk32_f32_bf6 v[2:33], v[2:7], v6 mul:2

// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
v_cvt_scalef32_pk32_f32_bf6 v[2:33], v[2:7], v6 div:2

// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
v_cvt_scalef32_pk32_f32_bf6 v[2:33], v[2:7], v6 clamp div:2
6 changes: 6 additions & 0 deletions llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt
Original file line number Diff line number Diff line change
Expand Up @@ -611,3 +611,9 @@

# GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, s2, 3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x51,0xd2,0x02,0x06,0x01,0x00]
0x01,0x18,0x51,0xd2,0x02,0x06,0x01,0x00

# GFX950: v_cvt_scalef32_pk32_f32_fp6 v[2:33], v[2:7], v6 ; encoding: [0x02,0x00,0x56,0xd2,0x02,0x0d,0x02,0x00]
0x02,0x00,0x56,0xd2,0x02,0x0d,0x02,0x00

# GFX950: v_cvt_scalef32_pk32_f32_bf6 v[2:33], v[2:7], v6 ; encoding: [0x02,0x00,0x57,0xd2,0x02,0x0d,0x02,0x00]
0x02,0x00,0x57,0xd2,0x02,0x0d,0x02,0x00
Loading