Skip to content

Commit cb1103c

Browse files
rampitecshiltian
andcommitted
[AMDGPU] Add support for v_cvt_f16_bf8 on gfx1250
Co-authored-by: Shilei Tian <[email protected]>
1 parent ed3e22a commit cb1103c

24 files changed

+400
-37
lines changed

clang/include/clang/Basic/BuiltinsAMDGPU.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -656,6 +656,7 @@ TARGET_BUILTIN(__builtin_amdgcn_ds_load_tr16_b128_v8bf16, "V8yV8y*3", "nc", "gfx
656656
TARGET_BUILTIN(__builtin_amdgcn_s_setprio_inc_wg, "vIs", "n", "setprio-inc-wg-inst")
657657

658658
TARGET_BUILTIN(__builtin_amdgcn_cvt_f16_fp8, "hiIi", "nc", "gfx1250-insts")
659+
TARGET_BUILTIN(__builtin_amdgcn_cvt_f16_bf8, "hiIi", "nc", "gfx1250-insts")
659660
TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_f16_fp8, "V2hs", "nc", "gfx1250-insts")
660661
TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_f16_bf8, "V2hs", "nc", "gfx1250-insts")
661662

clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,44 @@ void test_cvt_f16_fp8(global half* out, int a)
5353
out[3] = __builtin_amdgcn_cvt_f16_fp8(a, 3);
5454
}
5555

56+
// CHECK-LABEL: @test_cvt_f16_bf8(
57+
// CHECK-NEXT: entry:
58+
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
59+
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
60+
// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
61+
// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
62+
// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
63+
// CHECK-NEXT: store i32 [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4
64+
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
65+
// CHECK-NEXT: [[TMP1:%.*]] = call half @llvm.amdgcn.cvt.f16.bf8(i32 [[TMP0]], i32 0)
66+
// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
67+
// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds half, ptr addrspace(1) [[TMP2]], i64 0
68+
// CHECK-NEXT: store half [[TMP1]], ptr addrspace(1) [[ARRAYIDX]], align 2
69+
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
70+
// CHECK-NEXT: [[TMP4:%.*]] = call half @llvm.amdgcn.cvt.f16.bf8(i32 [[TMP3]], i32 1)
71+
// CHECK-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
72+
// CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds half, ptr addrspace(1) [[TMP5]], i64 1
73+
// CHECK-NEXT: store half [[TMP4]], ptr addrspace(1) [[ARRAYIDX1]], align 2
74+
// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
75+
// CHECK-NEXT: [[TMP7:%.*]] = call half @llvm.amdgcn.cvt.f16.bf8(i32 [[TMP6]], i32 2)
76+
// CHECK-NEXT: [[TMP8:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
77+
// CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds half, ptr addrspace(1) [[TMP8]], i64 2
78+
// CHECK-NEXT: store half [[TMP7]], ptr addrspace(1) [[ARRAYIDX2]], align 2
79+
// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
80+
// CHECK-NEXT: [[TMP10:%.*]] = call half @llvm.amdgcn.cvt.f16.bf8(i32 [[TMP9]], i32 3)
81+
// CHECK-NEXT: [[TMP11:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
82+
// CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds half, ptr addrspace(1) [[TMP11]], i64 3
83+
// CHECK-NEXT: store half [[TMP10]], ptr addrspace(1) [[ARRAYIDX3]], align 2
84+
// CHECK-NEXT: ret void
85+
//
86+
void test_cvt_f16_bf8(global half* out, int a)
87+
{
88+
out[0] = __builtin_amdgcn_cvt_f16_bf8(a, 0);
89+
out[1] = __builtin_amdgcn_cvt_f16_bf8(a, 1);
90+
out[2] = __builtin_amdgcn_cvt_f16_bf8(a, 2);
91+
out[3] = __builtin_amdgcn_cvt_f16_bf8(a, 3);
92+
}
93+
5694
// CHECK-LABEL: @test_cvt_pk_f16_fp8(
5795
// CHECK-NEXT: entry:
5896
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)

clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-param.cl

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,7 @@ void test_setprio_inc_wg(short a) {
88
void test__builtin_amdgcn_cvt_f16_fp8(int a, int b) {
99
__builtin_amdgcn_cvt_f16_fp8(a, b); // expected-error {{'__builtin_amdgcn_cvt_f16_fp8' must be a constant integer}}
1010
}
11+
12+
void test__builtin_amdgcn_cvt_f16_bf8(int a, int b) {
13+
__builtin_amdgcn_cvt_f16_bf8(a, b); // expected-error {{'__builtin_amdgcn_cvt_f16_bf8' must be a constant integer}}
14+
}

llvm/include/llvm/IR/IntrinsicsAMDGPU.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3510,6 +3510,12 @@ def int_amdgcn_cvt_f16_fp8 : ClangBuiltin<"__builtin_amdgcn_cvt_f16_fp8">,
35103510
[llvm_i32_ty, llvm_i32_ty],
35113511
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
35123512

3513+
// llvm.amdgcn.cvt.f16.bf8 half vdst, int srcA, imm byte_sel [0..3]
3514+
def int_amdgcn_cvt_f16_bf8 : ClangBuiltin<"__builtin_amdgcn_cvt_f16_bf8">,
3515+
DefaultAttrsIntrinsic<[llvm_half_ty],
3516+
[llvm_i32_ty, llvm_i32_ty],
3517+
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
3518+
35133519
//===----------------------------------------------------------------------===//
35143520
// Special Intrinsics for backend internal use only. No frontend
35153521
// should emit calls to these.

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4597,6 +4597,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
45974597
case Intrinsic::amdgcn_cvt_sr_bf16_f32:
45984598
case Intrinsic::amdgcn_cvt_sr_f16_f32:
45994599
case Intrinsic::amdgcn_cvt_f16_fp8:
4600+
case Intrinsic::amdgcn_cvt_f16_bf8:
46004601
case Intrinsic::amdgcn_cvt_scalef32_pk32_fp6_f16:
46014602
case Intrinsic::amdgcn_cvt_scalef32_pk32_bf6_f16:
46024603
case Intrinsic::amdgcn_cvt_scalef32_pk32_fp6_bf16:

llvm/lib/Target/AMDGPU/VOP1Instructions.td

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -747,6 +747,8 @@ let SubtargetPredicate = isGFX1250Plus in {
747747
let mayRaiseFPException = 0, SchedRW = [WriteFloatCvt] in {
748748
defm V_CVT_F16_FP8 : VOP1Inst_t16_with_profiles<"v_cvt_f16_fp8",
749749
V_CVT_F16_F8_Profile, V_CVT_F16_F8_True16_Profile, V_CVT_F16_F8_Fake16_Profile>;
750+
defm V_CVT_F16_BF8 : VOP1Inst_t16_with_profiles<"v_cvt_f16_bf8",
751+
V_CVT_F16_F8_Profile, V_CVT_F16_F8_True16_Profile, V_CVT_F16_F8_Fake16_Profile>;
750752
defm V_CVT_PK_F16_FP8 : VOP1Inst_t16_with_profiles<"v_cvt_pk_f16_fp8",
751753
VOPProfile_CVT_PK_F16_F8, VOPProfile_CVT_PK_F16_F8_true16, VOPProfile_CVT_PK_F16_F8_fake16,
752754
int_amdgcn_cvt_pk_f16_fp8>;
@@ -757,9 +759,11 @@ let SubtargetPredicate = isGFX1250Plus in {
757759

758760
let True16Predicate = UseRealTrue16Insts in {
759761
def : Cvt_F_F8_Pat_ByteSel<int_amdgcn_cvt_f16_fp8, V_CVT_F16_FP8_t16_e64, 1>;
762+
def : Cvt_F_F8_Pat_ByteSel<int_amdgcn_cvt_f16_bf8, V_CVT_F16_BF8_t16_e64, 1>;
760763
}
761764
let True16Predicate = UseFakeTrue16Insts in {
762765
def : Cvt_F_F8_Pat_ByteSel<int_amdgcn_cvt_f16_fp8, V_CVT_F16_FP8_fake16_e64, 1>;
766+
def : Cvt_F_F8_Pat_ByteSel<int_amdgcn_cvt_f16_bf8, V_CVT_F16_BF8_fake16_e64, 1>;
763767
}
764768
} // End SubtargetPredicate = isGFX1250Plus
765769

@@ -1099,6 +1103,7 @@ defm V_CVT_F32_BF16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x072, "v_c
10991103
defm V_CVT_PK_F16_FP8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x075>;
11001104
defm V_CVT_PK_F16_BF8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x076>;
11011105
defm V_CVT_F16_FP8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x077>;
1106+
defm V_CVT_F16_BF8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x078>;
11021107

11031108
//===----------------------------------------------------------------------===//
11041109
// GFX10.

llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,18 @@ v_cvt_f32_bf16 v5, src_scc
4646
v_cvt_f32_bf16 v127, 0x8000
4747
// GFX1250: v_cvt_f32_bf16_e32 v127, 0x8000 ; encoding: [0xff,0xe4,0xfe,0x7e,0x00,0x80,0x00,0x00]
4848

49+
v_cvt_f16_bf8 v1, v2
50+
// GFX1250: v_cvt_f16_bf8_e32 v1, v2 ; encoding: [0x02,0xf1,0x02,0x7e]
51+
52+
v_cvt_f16_bf8 v1, s2
53+
// GFX1250: v_cvt_f16_bf8_e32 v1, s2 ; encoding: [0x02,0xf0,0x02,0x7e]
54+
55+
v_cvt_f16_bf8 v1, 2
56+
// GFX1250: v_cvt_f16_bf8_e32 v1, 2 ; encoding: [0x82,0xf0,0x02,0x7e]
57+
58+
v_cvt_f16_bf8 v1, 0x1234
59+
// GFX1250: v_cvt_f16_bf8_e32 v1, 0x1234 ; encoding: [0xff,0xf0,0x02,0x7e,0x34,0x12,0x00,0x00]
60+
4961
v_cvt_f16_fp8 v1, v2
5062
// GFX1250: v_cvt_f16_fp8_e32 v1, v2 ; encoding: [0x02,0xef,0x02,0x7e]
5163

llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,21 @@ v_cvt_f32_bf16 v127, 0x8000
4949
v_cvt_f32_bf16 v5, v1.h
5050
// GFX1250: v_cvt_f32_bf16_e32 v5, v1.h ; encoding: [0x81,0xe5,0x0a,0x7e]
5151

52+
v_cvt_f16_bf8 v1.l, v2
53+
// GFX1250: v_cvt_f16_bf8_e32 v1.l, v2 ; encoding: [0x02,0xf1,0x02,0x7e]
54+
55+
v_cvt_f16_bf8 v1.l, s2
56+
// GFX1250: v_cvt_f16_bf8_e32 v1.l, s2 ; encoding: [0x02,0xf0,0x02,0x7e]
57+
58+
v_cvt_f16_bf8 v1.l, 2
59+
// GFX1250: v_cvt_f16_bf8_e32 v1.l, 2 ; encoding: [0x82,0xf0,0x02,0x7e]
60+
61+
v_cvt_f16_bf8 v1.l, 0x1234
62+
// GFX1250: v_cvt_f16_bf8_e32 v1.l, 0x1234 ; encoding: [0xff,0xf0,0x02,0x7e,0x34,0x12,0x00,0x00]
63+
64+
v_cvt_f16_bf8 v1.h, v2
65+
// GFX1250: v_cvt_f16_bf8_e32 v1.h, v2 ; encoding: [0x02,0xf1,0x02,0x7f]
66+
5267
v_cvt_f16_fp8 v1.l, v2
5368
// GFX1250: v_cvt_f16_fp8_e32 v1.l, v2 ; encoding: [0x02,0xef,0x02,0x7e]
5469

llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,14 @@ v_cvt_f32_bf16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:
5858
// GFX1250: v_cvt_f32_bf16_dpp v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xe4,0xfe,0x7e,0x7f,0x6f,0x35,0x30]
5959
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
6060

61+
v_cvt_f16_bf8 v1, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf
62+
// GFX1250: v_cvt_f16_bf8_dpp v1, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf0,0x02,0x7e,0x02,0x39,0x00,0xff]
63+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
64+
65+
v_cvt_f16_bf8 v1, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf fi:1
66+
// GFX1250: v_cvt_f16_bf8_dpp v1, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0xfa,0xf0,0x02,0x7e,0x02,0x39,0x04,0xff]
67+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
68+
6169
v_cvt_f16_fp8 v1, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf
6270
// GFX1250: v_cvt_f16_fp8_dpp v1, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xee,0x02,0x7e,0x02,0x39,0x00,0xff]
6371
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,18 @@ v_cvt_f32_bf16 v5, v1.h quad_perm:[3,2,1,0]
6262
// GFX1250: v_cvt_f32_bf16_dpp v5, v1.h quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x81,0x1b,0x00,0xff]
6363
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
6464

65+
v_cvt_f16_bf8 v1.l, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf
66+
// GFX1250: v_cvt_f16_bf8_dpp v1.l, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf0,0x02,0x7e,0x02,0x39,0x00,0xff]
67+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
68+
69+
v_cvt_f16_bf8 v1.l, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf fi:1
70+
// GFX1250: v_cvt_f16_bf8_dpp v1.l, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0xfa,0xf0,0x02,0x7e,0x02,0x39,0x04,0xff]
71+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
72+
73+
v_cvt_f16_bf8 v1.h, v2 quad_perm:[0,1,2,3]
74+
// GFX1250: v_cvt_f16_bf8_dpp v1.h, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf0,0x02,0x7f,0x02,0xe4,0x00,0xff]
75+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
76+
6577
v_cvt_f16_fp8 v1.l, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf
6678
// GFX1250: v_cvt_f16_fp8_dpp v1.l, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xee,0x02,0x7e,0x02,0x39,0x00,0xff]
6779
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,14 @@ v_cvt_f32_bf16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
1414
// GFX1250: v_cvt_f32_bf16_dpp v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xe4,0xfe,0x7e,0x7f,0x00,0x00,0x00]
1515
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
1616

17+
v_cvt_f16_bf8 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
18+
// GFX1250: v_cvt_f16_bf8_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xf0,0x02,0x7e,0x02,0x77,0x39,0x05]
19+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
20+
21+
v_cvt_f16_bf8 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
22+
// GFX1250: v_cvt_f16_bf8_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xf0,0x02,0x7e,0x02,0x77,0x39,0x05]
23+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
24+
1725
v_cvt_f16_fp8 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
1826
// GFX1250: v_cvt_f16_fp8_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xee,0x02,0x7e,0x02,0x77,0x39,0x05]
1927
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,18 @@ v_cvt_f32_bf16 v5, v1.h dpp8:[7,6,5,4,3,2,1,0]
1818
// GFX1250: v_cvt_f32_bf16_dpp v5, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xe4,0x0a,0x7e,0x81,0x77,0x39,0x05]
1919
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
2020

21+
v_cvt_f16_bf8 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
22+
// GFX1250: v_cvt_f16_bf8_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xf0,0x02,0x7e,0x02,0x77,0x39,0x05]
23+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
24+
25+
v_cvt_f16_bf8 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
26+
// GFX1250: v_cvt_f16_bf8_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xf0,0x02,0x7e,0x02,0x77,0x39,0x05]
27+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
28+
29+
v_cvt_f16_bf8 v1.h, v2 dpp8:[7,6,5,4,3,2,1,0]
30+
// GFX1250: v_cvt_f16_bf8_dpp v1.h, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xf0,0x02,0x7f,0x02,0x77,0x39,0x05]
31+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
32+
2133
v_cvt_f16_fp8 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
2234
// GFX1250: v_cvt_f16_fp8_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xee,0x02,0x7e,0x02,0x77,0x39,0x05]
2335
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,33 @@ v_cvt_f32_bf16_e64 v5, -1 op_sel:[1]
7676
v_cvt_f32_bf16_e64 v5, src_scc op_sel:[1]
7777
// GFX1250: v_cvt_f32_bf16_e64 v5, src_scc op_sel:[1,0] ; encoding: [0x05,0x08,0xf2,0xd5,0xfd,0x00,0x00,0x00]
7878

79+
v_cvt_f16_bf8 v150, v2
80+
// GFX1250: v_cvt_f16_bf8_e64 v150, v2 ; encoding: [0x96,0x00,0xf8,0xd5,0x02,0x01,0x00,0x00]
81+
82+
v_cvt_f16_bf8 v150, s2
83+
// GFX1250: v_cvt_f16_bf8_e64 v150, s2 ; encoding: [0x96,0x00,0xf8,0xd5,0x02,0x00,0x00,0x00]
84+
85+
v_cvt_f16_bf8 v150, 2
86+
// GFX1250: v_cvt_f16_bf8_e64 v150, 2 ; encoding: [0x96,0x00,0xf8,0xd5,0x82,0x00,0x00,0x00]
87+
88+
v_cvt_f16_bf8 v150, 0x1234
89+
// GFX1250: v_cvt_f16_bf8_e64 v150, 0x1234 ; encoding: [0x96,0x00,0xf8,0xd5,0xff,0x00,0x00,0x00,0x34,0x12,0x00,0x00]
90+
91+
v_cvt_f16_bf8 v1, v2 byte_sel:2
92+
// GFX1250: v_cvt_f16_bf8_e64 v1, v2 byte_sel:2 ; encoding: [0x01,0x08,0xf8,0xd5,0x02,0x01,0x00,0x00]
93+
94+
v_cvt_f16_bf8 v1, v2 byte_sel:1
95+
// GFX1250: v_cvt_f16_bf8_e64 v1, v2 byte_sel:1 ; encoding: [0x01,0x10,0xf8,0xd5,0x02,0x01,0x00,0x00]
96+
97+
v_cvt_f16_bf8 v1, v2 byte_sel:3
98+
// GFX1250: v_cvt_f16_bf8_e64 v1, v2 byte_sel:3 ; encoding: [0x01,0x18,0xf8,0xd5,0x02,0x01,0x00,0x00]
99+
100+
v_cvt_f16_bf8 v128, v2 op_sel:[0,1]
101+
// GFX1250: v_cvt_f16_bf8_e64 v128, v2 op_sel:[0,1] ; encoding: [0x80,0x40,0xf8,0xd5,0x02,0x01,0x00,0x00]
102+
103+
v_cvt_f16_bf8 v1, v2 op_sel:[0,1] byte_sel:2
104+
// GFX1250: v_cvt_f16_bf8_e64 v1, v2 op_sel:[0,1] byte_sel:2 ; encoding: [0x01,0x48,0xf8,0xd5,0x02,0x01,0x00,0x00]
105+
79106
v_cvt_f16_fp8 v150, v2
80107
// GFX1250: v_cvt_f16_fp8_e64 v150, v2 ; encoding: [0x96,0x00,0xf7,0xd5,0x02,0x01,0x00,0x00]
81108

llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,33 @@ v_cvt_f32_bf16_e64 v5, src_scc op_sel:[1]
7979
v_cvt_f32_bf16_e64 v5, v128.h
8080
// GFX1250: v_cvt_f32_bf16_e64 v5, v128.h op_sel:[1,0] ; encoding: [0x05,0x08,0xf2,0xd5,0x80,0x01,0x00,0x00]
8181

82+
v_cvt_f16_bf8 v150.l, v2
83+
// GFX1250: v_cvt_f16_bf8_e64 v150.l, v2 ; encoding: [0x96,0x00,0xf8,0xd5,0x02,0x01,0x00,0x00]
84+
85+
v_cvt_f16_bf8 v150.l, s2
86+
// GFX1250: v_cvt_f16_bf8_e64 v150.l, s2 ; encoding: [0x96,0x00,0xf8,0xd5,0x02,0x00,0x00,0x00]
87+
88+
v_cvt_f16_bf8 v150.l, 2
89+
// GFX1250: v_cvt_f16_bf8_e64 v150.l, 2 ; encoding: [0x96,0x00,0xf8,0xd5,0x82,0x00,0x00,0x00]
90+
91+
v_cvt_f16_bf8 v150.l, 0x1234
92+
// GFX1250: v_cvt_f16_bf8_e64 v150.l, 0x1234 ; encoding: [0x96,0x00,0xf8,0xd5,0xff,0x00,0x00,0x00,0x34,0x12,0x00,0x00]
93+
94+
v_cvt_f16_bf8 v1.l, v2 byte_sel:2
95+
// GFX1250: v_cvt_f16_bf8_e64 v1.l, v2 byte_sel:2 ; encoding: [0x01,0x08,0xf8,0xd5,0x02,0x01,0x00,0x00]
96+
97+
v_cvt_f16_bf8 v1.l, v2 byte_sel:1
98+
// GFX1250: v_cvt_f16_bf8_e64 v1.l, v2 byte_sel:1 ; encoding: [0x01,0x10,0xf8,0xd5,0x02,0x01,0x00,0x00]
99+
100+
v_cvt_f16_bf8 v1.l, v2 byte_sel:3
101+
// GFX1250: v_cvt_f16_bf8_e64 v1.l, v2 byte_sel:3 ; encoding: [0x01,0x18,0xf8,0xd5,0x02,0x01,0x00,0x00]
102+
103+
v_cvt_f16_bf8 v128.h, v2
104+
// GFX1250: v_cvt_f16_bf8_e64 v128.h, v2 op_sel:[0,1] ; encoding: [0x80,0x40,0xf8,0xd5,0x02,0x01,0x00,0x00]
105+
106+
v_cvt_f16_bf8 v1.h, v2 byte_sel:2
107+
// GFX1250: v_cvt_f16_bf8_e64 v1.h, v2 op_sel:[0,1] byte_sel:2 ; encoding: [0x01,0x48,0xf8,0xd5,0x02,0x01,0x00,0x00]
108+
82109
v_cvt_f16_fp8 v150.l, v2
83110
// GFX1250: v_cvt_f16_fp8_e64 v150.l, v2 ; encoding: [0x96,0x00,0xf7,0xd5,0x02,0x01,0x00,0x00]
84111

llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16-fake16.s

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,26 @@ v_cvt_f32_bf16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf
4646
// GFX1250: v_cvt_f32_bf16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff]
4747
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
4848

49+
v_cvt_f16_bf8 v1, v2 byte_sel:2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf
50+
// GFX1250: v_cvt_f16_bf8_e64_dpp v1, v2 byte_sel:2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x08,0xf8,0xd5,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff]
51+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
52+
53+
v_cvt_f16_bf8 v1, v2 byte_sel:1 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf
54+
// GFX1250: v_cvt_f16_bf8_e64_dpp v1, v2 byte_sel:1 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x10,0xf8,0xd5,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff]
55+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
56+
57+
v_cvt_f16_bf8 v1, v2 byte_sel:3 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf
58+
// GFX1250: v_cvt_f16_bf8_e64_dpp v1, v2 byte_sel:3 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x18,0xf8,0xd5,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff]
59+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
60+
61+
v_cvt_f16_bf8 v150, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf
62+
// GFX1250: v_cvt_f16_bf8_e64_dpp v150, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0x96,0x00,0xf8,0xd5,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff]
63+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
64+
65+
v_cvt_f16_bf8 v1, v2 op_sel:[0,1] byte_sel:3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
66+
// GFX1250: v_cvt_f16_bf8_e64_dpp v1, v2 op_sel:[0,1] byte_sel:3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x58,0xf8,0xd5,0xfa,0x00,0x00,0x00,0x02,0xe4,0x00,0xff]
67+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
68+
4969
v_cvt_f16_fp8 v1, v2 byte_sel:2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf
5070
// GFX1250: v_cvt_f16_fp8_e64_dpp v1, v2 byte_sel:2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x08,0xf7,0xd5,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff]
5171
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

0 commit comments

Comments
 (0)