Skip to content

[AMDGPU] Add support for v_cvt_f16_bf8 on gfx1250 #146305

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 30, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions clang/include/clang/Basic/BuiltinsAMDGPU.def
Original file line number Diff line number Diff line change
Expand Up @@ -657,6 +657,7 @@ TARGET_BUILTIN(__builtin_amdgcn_s_setprio_inc_wg, "vIs", "n", "setprio-inc-wg-in
TARGET_BUILTIN(__builtin_amdgcn_s_monitor_sleep, "vIs", "n", "gfx1250-insts")

TARGET_BUILTIN(__builtin_amdgcn_cvt_f16_fp8, "hiIi", "nc", "gfx1250-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_f16_bf8, "hiIi", "nc", "gfx1250-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_f16_fp8, "V2hs", "nc", "gfx1250-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_f16_bf8, "V2hs", "nc", "gfx1250-insts")

Expand Down
38 changes: 38 additions & 0 deletions clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,44 @@ void test_cvt_f16_fp8(global half* out, int a)
out[3] = __builtin_amdgcn_cvt_f16_fp8(a, 3);
}

// CHECK-LABEL: @test_cvt_f16_bf8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
// CHECK-NEXT: store i32 [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = call half @llvm.amdgcn.cvt.f16.bf8(i32 [[TMP0]], i32 0)
// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds half, ptr addrspace(1) [[TMP2]], i64 0
// CHECK-NEXT: store half [[TMP1]], ptr addrspace(1) [[ARRAYIDX]], align 2
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP4:%.*]] = call half @llvm.amdgcn.cvt.f16.bf8(i32 [[TMP3]], i32 1)
// CHECK-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds half, ptr addrspace(1) [[TMP5]], i64 1
// CHECK-NEXT: store half [[TMP4]], ptr addrspace(1) [[ARRAYIDX1]], align 2
// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP7:%.*]] = call half @llvm.amdgcn.cvt.f16.bf8(i32 [[TMP6]], i32 2)
// CHECK-NEXT: [[TMP8:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds half, ptr addrspace(1) [[TMP8]], i64 2
// CHECK-NEXT: store half [[TMP7]], ptr addrspace(1) [[ARRAYIDX2]], align 2
// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP10:%.*]] = call half @llvm.amdgcn.cvt.f16.bf8(i32 [[TMP9]], i32 3)
// CHECK-NEXT: [[TMP11:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds half, ptr addrspace(1) [[TMP11]], i64 3
// CHECK-NEXT: store half [[TMP10]], ptr addrspace(1) [[ARRAYIDX3]], align 2
// CHECK-NEXT: ret void
//
void test_cvt_f16_bf8(global half* out, int a)
{
out[0] = __builtin_amdgcn_cvt_f16_bf8(a, 0);
out[1] = __builtin_amdgcn_cvt_f16_bf8(a, 1);
out[2] = __builtin_amdgcn_cvt_f16_bf8(a, 2);
out[3] = __builtin_amdgcn_cvt_f16_bf8(a, 3);
}

// CHECK-LABEL: @test_cvt_pk_f16_fp8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
Expand Down
4 changes: 4 additions & 0 deletions clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-param.cl
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,7 @@ void test_s_monitor_sleep(short a) {
void test__builtin_amdgcn_cvt_f16_fp8(int a, int b) {
__builtin_amdgcn_cvt_f16_fp8(a, b); // expected-error {{'__builtin_amdgcn_cvt_f16_fp8' must be a constant integer}}
}

void test__builtin_amdgcn_cvt_f16_bf8(int a, int b) {
__builtin_amdgcn_cvt_f16_bf8(a, b); // expected-error {{'__builtin_amdgcn_cvt_f16_bf8' must be a constant integer}}
}
6 changes: 6 additions & 0 deletions llvm/include/llvm/IR/IntrinsicsAMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -3515,6 +3515,12 @@ def int_amdgcn_cvt_f16_fp8 : ClangBuiltin<"__builtin_amdgcn_cvt_f16_fp8">,
[llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;

// llvm.amdgcn.cvt.f16.bf8 half vdst, int srcA, imm byte_sel [0..3]
def int_amdgcn_cvt_f16_bf8 : ClangBuiltin<"__builtin_amdgcn_cvt_f16_bf8">,
DefaultAttrsIntrinsic<[llvm_half_ty],
[llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;

//===----------------------------------------------------------------------===//
// Special Intrinsics for backend internal use only. No frontend
// should emit calls to these.
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4597,6 +4597,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case Intrinsic::amdgcn_cvt_sr_bf16_f32:
case Intrinsic::amdgcn_cvt_sr_f16_f32:
case Intrinsic::amdgcn_cvt_f16_fp8:
case Intrinsic::amdgcn_cvt_f16_bf8:
case Intrinsic::amdgcn_cvt_scalef32_pk32_fp6_f16:
case Intrinsic::amdgcn_cvt_scalef32_pk32_bf6_f16:
case Intrinsic::amdgcn_cvt_scalef32_pk32_fp6_bf16:
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Target/AMDGPU/VOP1Instructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -747,6 +747,8 @@ let SubtargetPredicate = isGFX1250Plus in {
let mayRaiseFPException = 0, SchedRW = [WriteFloatCvt] in {
defm V_CVT_F16_FP8 : VOP1Inst_t16_with_profiles<"v_cvt_f16_fp8",
V_CVT_F16_F8_Profile, V_CVT_F16_F8_True16_Profile, V_CVT_F16_F8_Fake16_Profile>;
defm V_CVT_F16_BF8 : VOP1Inst_t16_with_profiles<"v_cvt_f16_bf8",
V_CVT_F16_F8_Profile, V_CVT_F16_F8_True16_Profile, V_CVT_F16_F8_Fake16_Profile>;
defm V_CVT_PK_F16_FP8 : VOP1Inst_t16_with_profiles<"v_cvt_pk_f16_fp8",
VOPProfile_CVT_PK_F16_F8, VOPProfile_CVT_PK_F16_F8_true16, VOPProfile_CVT_PK_F16_F8_fake16,
int_amdgcn_cvt_pk_f16_fp8>;
Expand All @@ -757,9 +759,11 @@ let SubtargetPredicate = isGFX1250Plus in {

let True16Predicate = UseRealTrue16Insts in {
def : Cvt_F_F8_Pat_ByteSel<int_amdgcn_cvt_f16_fp8, V_CVT_F16_FP8_t16_e64, 1>;
def : Cvt_F_F8_Pat_ByteSel<int_amdgcn_cvt_f16_bf8, V_CVT_F16_BF8_t16_e64, 1>;
}
let True16Predicate = UseFakeTrue16Insts in {
def : Cvt_F_F8_Pat_ByteSel<int_amdgcn_cvt_f16_fp8, V_CVT_F16_FP8_fake16_e64, 1>;
def : Cvt_F_F8_Pat_ByteSel<int_amdgcn_cvt_f16_bf8, V_CVT_F16_BF8_fake16_e64, 1>;
}
} // End SubtargetPredicate = isGFX1250Plus

Expand Down Expand Up @@ -1099,6 +1103,7 @@ defm V_CVT_F32_BF16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x072, "v_c
defm V_CVT_PK_F16_FP8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x075>;
defm V_CVT_PK_F16_BF8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x076>;
defm V_CVT_F16_FP8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x077>;
defm V_CVT_F16_BF8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x078>;

//===----------------------------------------------------------------------===//
// GFX10.
Expand Down
12 changes: 12 additions & 0 deletions llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,18 @@ v_cvt_f32_bf16 v5, src_scc
v_cvt_f32_bf16 v127, 0x8000
// GFX1250: v_cvt_f32_bf16_e32 v127, 0x8000 ; encoding: [0xff,0xe4,0xfe,0x7e,0x00,0x80,0x00,0x00]

v_cvt_f16_bf8 v1, v2
// GFX1250: v_cvt_f16_bf8_e32 v1, v2 ; encoding: [0x02,0xf1,0x02,0x7e]

v_cvt_f16_bf8 v1, s2
// GFX1250: v_cvt_f16_bf8_e32 v1, s2 ; encoding: [0x02,0xf0,0x02,0x7e]

v_cvt_f16_bf8 v1, 2
// GFX1250: v_cvt_f16_bf8_e32 v1, 2 ; encoding: [0x82,0xf0,0x02,0x7e]

v_cvt_f16_bf8 v1, 0x1234
// GFX1250: v_cvt_f16_bf8_e32 v1, 0x1234 ; encoding: [0xff,0xf0,0x02,0x7e,0x34,0x12,0x00,0x00]

v_cvt_f16_fp8 v1, v2
// GFX1250: v_cvt_f16_fp8_e32 v1, v2 ; encoding: [0x02,0xef,0x02,0x7e]

Expand Down
15 changes: 15 additions & 0 deletions llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,21 @@ v_cvt_f32_bf16 v127, 0x8000
v_cvt_f32_bf16 v5, v1.h
// GFX1250: v_cvt_f32_bf16_e32 v5, v1.h ; encoding: [0x81,0xe5,0x0a,0x7e]

v_cvt_f16_bf8 v1.l, v2
// GFX1250: v_cvt_f16_bf8_e32 v1.l, v2 ; encoding: [0x02,0xf1,0x02,0x7e]

v_cvt_f16_bf8 v1.l, s2
// GFX1250: v_cvt_f16_bf8_e32 v1.l, s2 ; encoding: [0x02,0xf0,0x02,0x7e]

v_cvt_f16_bf8 v1.l, 2
// GFX1250: v_cvt_f16_bf8_e32 v1.l, 2 ; encoding: [0x82,0xf0,0x02,0x7e]

v_cvt_f16_bf8 v1.l, 0x1234
// GFX1250: v_cvt_f16_bf8_e32 v1.l, 0x1234 ; encoding: [0xff,0xf0,0x02,0x7e,0x34,0x12,0x00,0x00]

v_cvt_f16_bf8 v1.h, v2
// GFX1250: v_cvt_f16_bf8_e32 v1.h, v2 ; encoding: [0x02,0xf1,0x02,0x7f]

v_cvt_f16_fp8 v1.l, v2
// GFX1250: v_cvt_f16_fp8_e32 v1.l, v2 ; encoding: [0x02,0xef,0x02,0x7e]

Expand Down
8 changes: 8 additions & 0 deletions llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,14 @@ v_cvt_f32_bf16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:
// GFX1250: v_cvt_f32_bf16_dpp v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xe4,0xfe,0x7e,0x7f,0x6f,0x35,0x30]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_cvt_f16_bf8 v1, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf
// GFX1250: v_cvt_f16_bf8_dpp v1, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf0,0x02,0x7e,0x02,0x39,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_cvt_f16_bf8 v1, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf fi:1
// GFX1250: v_cvt_f16_bf8_dpp v1, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0xfa,0xf0,0x02,0x7e,0x02,0x39,0x04,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_cvt_f16_fp8 v1, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf
// GFX1250: v_cvt_f16_fp8_dpp v1, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xee,0x02,0x7e,0x02,0x39,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
Expand Down
12 changes: 12 additions & 0 deletions llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,18 @@ v_cvt_f32_bf16 v5, v1.h quad_perm:[3,2,1,0]
// GFX1250: v_cvt_f32_bf16_dpp v5, v1.h quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x81,0x1b,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_cvt_f16_bf8 v1.l, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf
// GFX1250: v_cvt_f16_bf8_dpp v1.l, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf0,0x02,0x7e,0x02,0x39,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_cvt_f16_bf8 v1.l, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf fi:1
// GFX1250: v_cvt_f16_bf8_dpp v1.l, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0xfa,0xf0,0x02,0x7e,0x02,0x39,0x04,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_cvt_f16_bf8 v1.h, v2 quad_perm:[0,1,2,3]
// GFX1250: v_cvt_f16_bf8_dpp v1.h, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf0,0x02,0x7f,0x02,0xe4,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_cvt_f16_fp8 v1.l, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf
// GFX1250: v_cvt_f16_fp8_dpp v1.l, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xee,0x02,0x7e,0x02,0x39,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
Expand Down
8 changes: 8 additions & 0 deletions llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,14 @@ v_cvt_f32_bf16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX1250: v_cvt_f32_bf16_dpp v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xe4,0xfe,0x7e,0x7f,0x00,0x00,0x00]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_cvt_f16_bf8 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX1250: v_cvt_f16_bf8_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xf0,0x02,0x7e,0x02,0x77,0x39,0x05]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_cvt_f16_bf8 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
// GFX1250: v_cvt_f16_bf8_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xf0,0x02,0x7e,0x02,0x77,0x39,0x05]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_cvt_f16_fp8 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX1250: v_cvt_f16_fp8_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xee,0x02,0x7e,0x02,0x77,0x39,0x05]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
Expand Down
12 changes: 12 additions & 0 deletions llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,18 @@ v_cvt_f32_bf16 v5, v1.h dpp8:[7,6,5,4,3,2,1,0]
// GFX1250: v_cvt_f32_bf16_dpp v5, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xe4,0x0a,0x7e,0x81,0x77,0x39,0x05]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_cvt_f16_bf8 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX1250: v_cvt_f16_bf8_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xf0,0x02,0x7e,0x02,0x77,0x39,0x05]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_cvt_f16_bf8 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
// GFX1250: v_cvt_f16_bf8_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xf0,0x02,0x7e,0x02,0x77,0x39,0x05]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_cvt_f16_bf8 v1.h, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX1250: v_cvt_f16_bf8_dpp v1.h, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xf0,0x02,0x7f,0x02,0x77,0x39,0x05]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_cvt_f16_fp8 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX1250: v_cvt_f16_fp8_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xee,0x02,0x7e,0x02,0x77,0x39,0x05]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
Expand Down
27 changes: 27 additions & 0 deletions llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,33 @@ v_cvt_f32_bf16_e64 v5, -1 op_sel:[1]
v_cvt_f32_bf16_e64 v5, src_scc op_sel:[1]
// GFX1250: v_cvt_f32_bf16_e64 v5, src_scc op_sel:[1,0] ; encoding: [0x05,0x08,0xf2,0xd5,0xfd,0x00,0x00,0x00]

v_cvt_f16_bf8 v150, v2
// GFX1250: v_cvt_f16_bf8_e64 v150, v2 ; encoding: [0x96,0x00,0xf8,0xd5,0x02,0x01,0x00,0x00]

v_cvt_f16_bf8 v150, s2
// GFX1250: v_cvt_f16_bf8_e64 v150, s2 ; encoding: [0x96,0x00,0xf8,0xd5,0x02,0x00,0x00,0x00]

v_cvt_f16_bf8 v150, 2
// GFX1250: v_cvt_f16_bf8_e64 v150, 2 ; encoding: [0x96,0x00,0xf8,0xd5,0x82,0x00,0x00,0x00]

v_cvt_f16_bf8 v150, 0x1234
// GFX1250: v_cvt_f16_bf8_e64 v150, 0x1234 ; encoding: [0x96,0x00,0xf8,0xd5,0xff,0x00,0x00,0x00,0x34,0x12,0x00,0x00]

v_cvt_f16_bf8 v1, v2 byte_sel:2
// GFX1250: v_cvt_f16_bf8_e64 v1, v2 byte_sel:2 ; encoding: [0x01,0x08,0xf8,0xd5,0x02,0x01,0x00,0x00]

v_cvt_f16_bf8 v1, v2 byte_sel:1
// GFX1250: v_cvt_f16_bf8_e64 v1, v2 byte_sel:1 ; encoding: [0x01,0x10,0xf8,0xd5,0x02,0x01,0x00,0x00]

v_cvt_f16_bf8 v1, v2 byte_sel:3
// GFX1250: v_cvt_f16_bf8_e64 v1, v2 byte_sel:3 ; encoding: [0x01,0x18,0xf8,0xd5,0x02,0x01,0x00,0x00]

v_cvt_f16_bf8 v128, v2 op_sel:[0,1]
// GFX1250: v_cvt_f16_bf8_e64 v128, v2 op_sel:[0,1] ; encoding: [0x80,0x40,0xf8,0xd5,0x02,0x01,0x00,0x00]

v_cvt_f16_bf8 v1, v2 op_sel:[0,1] byte_sel:2
// GFX1250: v_cvt_f16_bf8_e64 v1, v2 op_sel:[0,1] byte_sel:2 ; encoding: [0x01,0x48,0xf8,0xd5,0x02,0x01,0x00,0x00]

v_cvt_f16_fp8 v150, v2
// GFX1250: v_cvt_f16_fp8_e64 v150, v2 ; encoding: [0x96,0x00,0xf7,0xd5,0x02,0x01,0x00,0x00]

Expand Down
27 changes: 27 additions & 0 deletions llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,33 @@ v_cvt_f32_bf16_e64 v5, src_scc op_sel:[1]
v_cvt_f32_bf16_e64 v5, v128.h
// GFX1250: v_cvt_f32_bf16_e64 v5, v128.h op_sel:[1,0] ; encoding: [0x05,0x08,0xf2,0xd5,0x80,0x01,0x00,0x00]

v_cvt_f16_bf8 v150.l, v2
// GFX1250: v_cvt_f16_bf8_e64 v150.l, v2 ; encoding: [0x96,0x00,0xf8,0xd5,0x02,0x01,0x00,0x00]

v_cvt_f16_bf8 v150.l, s2
// GFX1250: v_cvt_f16_bf8_e64 v150.l, s2 ; encoding: [0x96,0x00,0xf8,0xd5,0x02,0x00,0x00,0x00]

v_cvt_f16_bf8 v150.l, 2
// GFX1250: v_cvt_f16_bf8_e64 v150.l, 2 ; encoding: [0x96,0x00,0xf8,0xd5,0x82,0x00,0x00,0x00]

v_cvt_f16_bf8 v150.l, 0x1234
// GFX1250: v_cvt_f16_bf8_e64 v150.l, 0x1234 ; encoding: [0x96,0x00,0xf8,0xd5,0xff,0x00,0x00,0x00,0x34,0x12,0x00,0x00]

v_cvt_f16_bf8 v1.l, v2 byte_sel:2
// GFX1250: v_cvt_f16_bf8_e64 v1.l, v2 byte_sel:2 ; encoding: [0x01,0x08,0xf8,0xd5,0x02,0x01,0x00,0x00]

v_cvt_f16_bf8 v1.l, v2 byte_sel:1
// GFX1250: v_cvt_f16_bf8_e64 v1.l, v2 byte_sel:1 ; encoding: [0x01,0x10,0xf8,0xd5,0x02,0x01,0x00,0x00]

v_cvt_f16_bf8 v1.l, v2 byte_sel:3
// GFX1250: v_cvt_f16_bf8_e64 v1.l, v2 byte_sel:3 ; encoding: [0x01,0x18,0xf8,0xd5,0x02,0x01,0x00,0x00]

v_cvt_f16_bf8 v128.h, v2
// GFX1250: v_cvt_f16_bf8_e64 v128.h, v2 op_sel:[0,1] ; encoding: [0x80,0x40,0xf8,0xd5,0x02,0x01,0x00,0x00]

v_cvt_f16_bf8 v1.h, v2 byte_sel:2
// GFX1250: v_cvt_f16_bf8_e64 v1.h, v2 op_sel:[0,1] byte_sel:2 ; encoding: [0x01,0x48,0xf8,0xd5,0x02,0x01,0x00,0x00]

v_cvt_f16_fp8 v150.l, v2
// GFX1250: v_cvt_f16_fp8_e64 v150.l, v2 ; encoding: [0x96,0x00,0xf7,0xd5,0x02,0x01,0x00,0x00]

Expand Down
20 changes: 20 additions & 0 deletions llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16-fake16.s
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,26 @@ v_cvt_f32_bf16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf
// GFX1250: v_cvt_f32_bf16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_cvt_f16_bf8 v1, v2 byte_sel:2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf
// GFX1250: v_cvt_f16_bf8_e64_dpp v1, v2 byte_sel:2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x08,0xf8,0xd5,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_cvt_f16_bf8 v1, v2 byte_sel:1 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf
// GFX1250: v_cvt_f16_bf8_e64_dpp v1, v2 byte_sel:1 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x10,0xf8,0xd5,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_cvt_f16_bf8 v1, v2 byte_sel:3 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf
// GFX1250: v_cvt_f16_bf8_e64_dpp v1, v2 byte_sel:3 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x18,0xf8,0xd5,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_cvt_f16_bf8 v150, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf
// GFX1250: v_cvt_f16_bf8_e64_dpp v150, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0x96,0x00,0xf8,0xd5,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_cvt_f16_bf8 v1, v2 op_sel:[0,1] byte_sel:3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
// GFX1250: v_cvt_f16_bf8_e64_dpp v1, v2 op_sel:[0,1] byte_sel:3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x58,0xf8,0xd5,0xfa,0x00,0x00,0x00,0x02,0xe4,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_cvt_f16_fp8 v1, v2 byte_sel:2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf
// GFX1250: v_cvt_f16_fp8_e64_dpp v1, v2 byte_sel:2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x08,0xf7,0xd5,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
Expand Down
Loading
Loading