Skip to content

Commit 369ae61

Browse files
arsenmpravinjagtap
andcommitted
AMDGPU: MC support for v_cvt_scalef32_pk_{bf|f}16_{bf|fp}8 of gfx950. (llvm#117593)
OPSEL[0] selects src_word to read. Co-authored-by: Pravin Jagtap <[email protected]>
1 parent 8d78c8c commit 369ae61

File tree

4 files changed

+225
-1
lines changed

4 files changed

+225
-1
lines changed

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -948,6 +948,8 @@ let SubtargetPredicate = HasFP8ConversionScaleInsts, mayRaiseFPException = 0 in
948948
defm V_CVT_SCALEF32_PK_F32_FP8 : VOP3Inst<"v_cvt_scalef32_pk_f32_fp8", VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile<v2f32>>;
949949
defm V_CVT_SCALEF32_PK_FP8_F16 : VOP3Inst<"v_cvt_scalef32_pk_fp8_f16", VOP3_CVT_SCALE_PK_FP8BF8_F16BF16_Profile>;
950950
defm V_CVT_SCALEF32_PK_FP8_BF16 : VOP3Inst<"v_cvt_scalef32_pk_fp8_bf16", VOP3_CVT_SCALE_PK_FP8BF8_F16BF16_Profile>;
951+
defm V_CVT_SCALEF32_PK_F16_FP8 : VOP3Inst<"v_cvt_scalef32_pk_f16_fp8", VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile<v2f16>>;
952+
defm V_CVT_SCALEF32_PK_BF16_FP8 : VOP3Inst<"v_cvt_scalef32_pk_bf16_fp8", VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile<v2f16>>;
951953
}
952954

953955
let SubtargetPredicate = HasBF8ConversionScaleInsts, mayRaiseFPException = 0 in {
@@ -957,6 +959,8 @@ let SubtargetPredicate = HasBF8ConversionScaleInsts, mayRaiseFPException = 0 in
957959
defm V_CVT_SCALEF32_PK_F32_BF8 : VOP3Inst<"v_cvt_scalef32_pk_f32_bf8", VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile<v2f32>>;
958960
defm V_CVT_SCALEF32_PK_BF8_F16 : VOP3Inst<"v_cvt_scalef32_pk_bf8_f16", VOP3_CVT_SCALE_PK_FP8BF8_F16BF16_Profile>;
959961
defm V_CVT_SCALEF32_PK_BF8_BF16 : VOP3Inst<"v_cvt_scalef32_pk_bf8_bf16", VOP3_CVT_SCALE_PK_FP8BF8_F16BF16_Profile>;
962+
defm V_CVT_SCALEF32_PK_F16_BF8 : VOP3Inst<"v_cvt_scalef32_pk_f16_bf8", VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile<v2bf16>>;
963+
defm V_CVT_SCALEF32_PK_BF16_BF8 : VOP3Inst<"v_cvt_scalef32_pk_bf16_bf8", VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile<v2bf16>>;
960964
}
961965

962966
let SubtargetPredicate = HasFP4ConversionScaleInsts, mayRaiseFPException = 0 in {
@@ -1888,6 +1892,8 @@ defm V_CVT_SCALEF32_PK_FP8_F32 : VOP3OpSel_Real_gfx9 <0x235>;
18881892
defm V_CVT_SCALEF32_PK_F32_FP8 : VOP3OpSel_Real_gfx9 <0x239>;
18891893
defm V_CVT_SCALEF32_PK_FP8_F16 : VOP3OpSel_Real_gfx9 <0x240>;
18901894
defm V_CVT_SCALEF32_PK_FP8_BF16: VOP3OpSel_Real_gfx9 <0x244>;
1895+
defm V_CVT_SCALEF32_PK_F16_FP8 : VOP3OpSel_Real_gfx9<0x248>;
1896+
defm V_CVT_SCALEF32_PK_BF16_FP8 : VOP3OpSel_Real_gfx9<0x269>;
18911897
}
18921898
let OtherPredicates = [HasBF8ConversionScaleInsts] in {
18931899
defm V_CVT_SCALEF32_F16_BF8 : VOP3OpSel_Real_gfx9 <0x24b>;
@@ -1896,6 +1902,8 @@ defm V_CVT_SCALEF32_PK_BF8_F32 : VOP3OpSel_Real_gfx9 <0x236>;
18961902
defm V_CVT_SCALEF32_PK_F32_BF8 : VOP3OpSel_Real_gfx9 <0x23a>;
18971903
defm V_CVT_SCALEF32_PK_BF8_F16 : VOP3OpSel_Real_gfx9 <0x241>;
18981904
defm V_CVT_SCALEF32_PK_BF8_BF16: VOP3OpSel_Real_gfx9 <0x245>;
1905+
defm V_CVT_SCALEF32_PK_F16_BF8 : VOP3OpSel_Real_gfx9<0x249>;
1906+
defm V_CVT_SCALEF32_PK_BF16_BF8 : VOP3OpSel_Real_gfx9<0x26a>;
18991907
}
19001908
let OtherPredicates = [HasFP4ConversionScaleInsts] in {
19011909
defm V_CVT_SCALEF32_PK_F32_FP4 : VOP3OpSel_Real_gfx9 <0x23f>;

llvm/test/MC/AMDGPU/gfx950_asm_features.s

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -929,3 +929,99 @@ v_cvt_scalef32_pk32_fp6_bf16 v[20:25], v[10:25], v8
929929
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
930930
// GFX950: v_cvt_scalef32_pk32_fp6_f16 v[20:25], v[10:25], v8 ; encoding: [0x14,0x00,0x58,0xd2,0x0a,0x11,0x02,0x00]
931931
v_cvt_scalef32_pk32_fp6_f16 v[20:25], v[10:25], v8
932+
933+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
934+
// GFX950: v_cvt_scalef32_pk_f16_fp8 v1, v2, v3 ; encoding: [0x01,0x00,0x48,0xd2,0x02,0x07,0x02,0x00]
935+
v_cvt_scalef32_pk_f16_fp8 v1, v2, v3
936+
937+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
938+
// GFX950: v_cvt_scalef32_pk_f16_fp8 v1, v2, s3 ; encoding: [0x01,0x00,0x48,0xd2,0x02,0x07,0x00,0x00]
939+
v_cvt_scalef32_pk_f16_fp8 v1, v2, s3
940+
941+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
942+
// GFX950: v_cvt_scalef32_pk_f16_fp8 v1, s2, 3 ; encoding: [0x01,0x00,0x48,0xd2,0x02,0x06,0x01,0x00]
943+
v_cvt_scalef32_pk_f16_fp8 v1, s2, 3
944+
945+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
946+
// GFX950: v_cvt_scalef32_pk_f16_fp8 v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x48,0xd2,0x02,0x07,0x02,0x00]
947+
v_cvt_scalef32_pk_f16_fp8 v1, v2, v3 op_sel:[1,0,0]
948+
949+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
950+
// GFX950: v_cvt_scalef32_pk_f16_fp8 v1, v2, s3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x48,0xd2,0x02,0x07,0x00,0x00]
951+
v_cvt_scalef32_pk_f16_fp8 v1, v2, s3 op_sel:[1,0,0]
952+
953+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
954+
// GFX950: v_cvt_scalef32_pk_f16_fp8 v1, s2, 3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x48,0xd2,0x02,0x06,0x01,0x00]
955+
v_cvt_scalef32_pk_f16_fp8 v1, s2, 3 op_sel:[1,0,0]
956+
957+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
958+
// GFX950: v_cvt_scalef32_pk_f16_bf8 v1, v2, v3 ; encoding: [0x01,0x00,0x49,0xd2,0x02,0x07,0x02,0x00]
959+
v_cvt_scalef32_pk_f16_bf8 v1, v2, v3
960+
961+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
962+
// GFX950: v_cvt_scalef32_pk_f16_bf8 v1, v2, s3 ; encoding: [0x01,0x00,0x49,0xd2,0x02,0x07,0x00,0x00]
963+
v_cvt_scalef32_pk_f16_bf8 v1, v2, s3
964+
965+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
966+
// GFX950: v_cvt_scalef32_pk_f16_bf8 v1, s2, 3 ; encoding: [0x01,0x00,0x49,0xd2,0x02,0x06,0x01,0x00]
967+
v_cvt_scalef32_pk_f16_bf8 v1, s2, 3
968+
969+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
970+
// GFX950: v_cvt_scalef32_pk_f16_bf8 v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x49,0xd2,0x02,0x07,0x02,0x00]
971+
v_cvt_scalef32_pk_f16_bf8 v1, v2, v3 op_sel:[1,0,0]
972+
973+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
974+
// GFX950: v_cvt_scalef32_pk_f16_bf8 v1, v2, s3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x49,0xd2,0x02,0x07,0x00,0x00]
975+
v_cvt_scalef32_pk_f16_bf8 v1, v2, s3 op_sel:[1,0,0]
976+
977+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
978+
// GFX950: v_cvt_scalef32_pk_f16_bf8 v1, s2, 3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x49,0xd2,0x02,0x06,0x01,0x00]
979+
v_cvt_scalef32_pk_f16_bf8 v1, s2, 3 op_sel:[1,0,0]
980+
981+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
982+
// GFX950: v_cvt_scalef32_pk_bf16_fp8 v1, v2, v3 ; encoding: [0x01,0x00,0x69,0xd2,0x02,0x07,0x02,0x00]
983+
v_cvt_scalef32_pk_bf16_fp8 v1, v2, v3
984+
985+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
986+
// GFX950: v_cvt_scalef32_pk_bf16_fp8 v1, v2, s3 ; encoding: [0x01,0x00,0x69,0xd2,0x02,0x07,0x00,0x00]
987+
v_cvt_scalef32_pk_bf16_fp8 v1, v2, s3
988+
989+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
990+
// GFX950: v_cvt_scalef32_pk_bf16_fp8 v1, s2, 3 ; encoding: [0x01,0x00,0x69,0xd2,0x02,0x06,0x01,0x00]
991+
v_cvt_scalef32_pk_bf16_fp8 v1, s2, 3
992+
993+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
994+
// GFX950: v_cvt_scalef32_pk_bf16_fp8 v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x69,0xd2,0x02,0x07,0x02,0x00]
995+
v_cvt_scalef32_pk_bf16_fp8 v1, v2, v3 op_sel:[1,0,0]
996+
997+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
998+
// GFX950: v_cvt_scalef32_pk_bf16_fp8 v1, v2, s3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x69,0xd2,0x02,0x07,0x00,0x00]
999+
v_cvt_scalef32_pk_bf16_fp8 v1, v2, s3 op_sel:[1,0,0]
1000+
1001+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
1002+
// GFX950: v_cvt_scalef32_pk_bf16_fp8 v1, s2, 3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x69,0xd2,0x02,0x06,0x01,0x00]
1003+
v_cvt_scalef32_pk_bf16_fp8 v1, s2, 3 op_sel:[1,0,0]
1004+
1005+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
1006+
// GFX950: v_cvt_scalef32_pk_bf16_bf8 v1, v2, v3 ; encoding: [0x01,0x00,0x6a,0xd2,0x02,0x07,0x02,0x00]
1007+
v_cvt_scalef32_pk_bf16_bf8 v1, v2, v3
1008+
1009+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
1010+
// GFX950: v_cvt_scalef32_pk_bf16_bf8 v1, v2, s3 ; encoding: [0x01,0x00,0x6a,0xd2,0x02,0x07,0x00,0x00]
1011+
v_cvt_scalef32_pk_bf16_bf8 v1, v2, s3
1012+
1013+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
1014+
// GFX950: v_cvt_scalef32_pk_bf16_bf8 v1, s2, 3 ; encoding: [0x01,0x00,0x6a,0xd2,0x02,0x06,0x01,0x00]
1015+
v_cvt_scalef32_pk_bf16_bf8 v1, s2, 3
1016+
1017+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
1018+
// GFX950: v_cvt_scalef32_pk_bf16_bf8 v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x6a,0xd2,0x02,0x07,0x02,0x00]
1019+
v_cvt_scalef32_pk_bf16_bf8 v1, v2, v3 op_sel:[1,0,0]
1020+
1021+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
1022+
// GFX950: v_cvt_scalef32_pk_bf16_bf8 v1, v2, s3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x6a,0xd2,0x02,0x07,0x00,0x00]
1023+
v_cvt_scalef32_pk_bf16_bf8 v1, v2, s3 op_sel:[1,0,0]
1024+
1025+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
1026+
// GFX950: v_cvt_scalef32_pk_bf16_bf8 v1, s2, 3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x6a,0xd2,0x02,0x06,0x01,0x00]
1027+
v_cvt_scalef32_pk_bf16_bf8 v1, s2, 3 op_sel:[1,0,0]

llvm/test/MC/AMDGPU/gfx950_err.s

Lines changed: 49 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -244,4 +244,52 @@ v_cvt_scalef32_pk32_fp6_f16 v[20:25], v[10:25], v8 mul:2
244244
v_cvt_scalef32_pk32_fp6_f16 v[20:25], v[10:25], v8 div:2
245245

246246
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
247-
v_cvt_scalef32_pk32_fp6_f16 v[20:25], v[10:25], v8 clamp div:2
247+
v_cvt_scalef32_pk32_fp6_f16 v[20:25], v[10:25], v8 clamp div:2
248+
249+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
250+
v_cvt_scalef32_pk_f16_fp8 v[20:25], v[10:25], v8 clamp
251+
252+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
253+
v_cvt_scalef32_pk_f16_fp8 v[20:25], v[10:25], v8 mul:2
254+
255+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
256+
v_cvt_scalef32_pk_f16_fp8 v[20:25], v[10:25], v8 div:2
257+
258+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
259+
v_cvt_scalef32_pk_f16_fp8 v[20:25], v[10:25], v8 clamp div:2
260+
261+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
262+
v_cvt_scalef32_pk_f16_bf8 v[20:25], v[10:25], v8 clamp
263+
264+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
265+
v_cvt_scalef32_pk_f16_bf8 v[20:25], v[10:25], v8 mul:2
266+
267+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
268+
v_cvt_scalef32_pk_f16_bf8 v[20:25], v[10:25], v8 div:2
269+
270+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
271+
v_cvt_scalef32_pk_f16_bf8 v[20:25], v[10:25], v8 clamp div:2
272+
273+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
274+
v_cvt_scalef32_pk_bf16_fp8 v[20:25], v[10:25], v8 clamp
275+
276+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
277+
v_cvt_scalef32_pk_bf16_fp8 v[20:25], v[10:25], v8 mul:2
278+
279+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
280+
v_cvt_scalef32_pk_bf16_fp8 v[20:25], v[10:25], v8 div:2
281+
282+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
283+
v_cvt_scalef32_pk_bf16_fp8 v[20:25], v[10:25], v8 clamp div:2
284+
285+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
286+
v_cvt_scalef32_pk_bf16_bf8 v[20:25], v[10:25], v8 clamp
287+
288+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
289+
v_cvt_scalef32_pk_bf16_bf8 v[20:25], v[10:25], v8 mul:2
290+
291+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
292+
v_cvt_scalef32_pk_bf16_bf8 v[20:25], v[10:25], v8 div:2
293+
294+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
295+
v_cvt_scalef32_pk_bf16_bf8 v[20:25], v[10:25], v8 clamp div:2

llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -641,3 +641,75 @@
641641

642642
# GFX950: v_cvt_scalef32_pk32_fp6_f16 v[20:25], v[10:25], v8 ; encoding: [0x14,0x00,0x58,0xd2,0x0a,0x11,0x02,0x00]
643643
0x14,0x00,0x58,0xd2,0x0a,0x11,0x02,0x00
644+
645+
# GFX950: v_cvt_scalef32_pk_f16_fp8 v1, v2, v3 ; encoding: [0x01,0x00,0x48,0xd2,0x02,0x07,0x02,0x00]
646+
0x01,0x00,0x48,0xd2,0x02,0x07,0x02,0x00
647+
648+
# GFX950: v_cvt_scalef32_pk_f16_fp8 v1, v2, s3 ; encoding: [0x01,0x00,0x48,0xd2,0x02,0x07,0x00,0x00]
649+
0x01,0x00,0x48,0xd2,0x02,0x07,0x00,0x00
650+
651+
# GFX950: v_cvt_scalef32_pk_f16_fp8 v1, s2, 3 ; encoding: [0x01,0x00,0x48,0xd2,0x02,0x06,0x01,0x00]
652+
0x01,0x00,0x48,0xd2,0x02,0x06,0x01,0x00
653+
654+
# GFX950: v_cvt_scalef32_pk_f16_fp8 v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x48,0xd2,0x02,0x07,0x02,0x00]
655+
0x01,0x08,0x48,0xd2,0x02,0x07,0x02,0x00
656+
657+
# GFX950: v_cvt_scalef32_pk_f16_fp8 v1, v2, s3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x48,0xd2,0x02,0x07,0x00,0x00]
658+
0x01,0x08,0x48,0xd2,0x02,0x07,0x00,0x00
659+
660+
# GFX950: v_cvt_scalef32_pk_f16_fp8 v1, s2, 3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x48,0xd2,0x02,0x06,0x01,0x00]
661+
0x01,0x08,0x48,0xd2,0x02,0x06,0x01,0x00
662+
663+
# GFX950: v_cvt_scalef32_pk_f16_bf8 v1, v2, v3 ; encoding: [0x01,0x00,0x49,0xd2,0x02,0x07,0x02,0x00]
664+
0x01,0x00,0x49,0xd2,0x02,0x07,0x02,0x00
665+
666+
# GFX950: v_cvt_scalef32_pk_f16_bf8 v1, v2, s3 ; encoding: [0x01,0x00,0x49,0xd2,0x02,0x07,0x00,0x00]
667+
0x01,0x00,0x49,0xd2,0x02,0x07,0x00,0x00
668+
669+
# GFX950: v_cvt_scalef32_pk_f16_bf8 v1, s2, 3 ; encoding: [0x01,0x00,0x49,0xd2,0x02,0x06,0x01,0x00]
670+
0x01,0x00,0x49,0xd2,0x02,0x06,0x01,0x00
671+
672+
# GFX950: v_cvt_scalef32_pk_f16_bf8 v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x49,0xd2,0x02,0x07,0x02,0x00]
673+
0x01,0x08,0x49,0xd2,0x02,0x07,0x02,0x00
674+
675+
# GFX950: v_cvt_scalef32_pk_f16_bf8 v1, v2, s3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x49,0xd2,0x02,0x07,0x00,0x00]
676+
0x01,0x08,0x49,0xd2,0x02,0x07,0x00,0x00
677+
678+
# GFX950: v_cvt_scalef32_pk_f16_bf8 v1, s2, 3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x49,0xd2,0x02,0x06,0x01,0x00]
679+
0x01,0x08,0x49,0xd2,0x02,0x06,0x01,0x00
680+
681+
# GFX950: v_cvt_scalef32_pk_bf16_fp8 v1, v2, v3 ; encoding: [0x01,0x00,0x69,0xd2,0x02,0x07,0x02,0x00]
682+
0x01,0x00,0x69,0xd2,0x02,0x07,0x02,0x00
683+
684+
# GFX950: v_cvt_scalef32_pk_bf16_fp8 v1, v2, s3 ; encoding: [0x01,0x00,0x69,0xd2,0x02,0x07,0x00,0x00]
685+
0x01,0x00,0x69,0xd2,0x02,0x07,0x00,0x00
686+
687+
# GFX950: v_cvt_scalef32_pk_bf16_fp8 v1, s2, 3 ; encoding: [0x01,0x00,0x69,0xd2,0x02,0x06,0x01,0x00]
688+
0x01,0x00,0x69,0xd2,0x02,0x06,0x01,0x00
689+
690+
# GFX950: v_cvt_scalef32_pk_bf16_fp8 v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x69,0xd2,0x02,0x07,0x02,0x00]
691+
0x01,0x08,0x69,0xd2,0x02,0x07,0x02,0x00
692+
693+
# GFX950: v_cvt_scalef32_pk_bf16_fp8 v1, v2, s3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x69,0xd2,0x02,0x07,0x00,0x00]
694+
0x01,0x08,0x69,0xd2,0x02,0x07,0x00,0x00
695+
696+
# GFX950: v_cvt_scalef32_pk_bf16_fp8 v1, s2, 3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x69,0xd2,0x02,0x06,0x01,0x00]
697+
0x01,0x08,0x69,0xd2,0x02,0x06,0x01,0x00
698+
699+
# GFX950: v_cvt_scalef32_pk_bf16_bf8 v1, v2, v3 ; encoding: [0x01,0x00,0x6a,0xd2,0x02,0x07,0x02,0x00]
700+
0x01,0x00,0x6a,0xd2,0x02,0x07,0x02,0x00
701+
702+
# GFX950: v_cvt_scalef32_pk_bf16_bf8 v1, v2, s3 ; encoding: [0x01,0x00,0x6a,0xd2,0x02,0x07,0x00,0x00]
703+
0x01,0x00,0x6a,0xd2,0x02,0x07,0x00,0x00
704+
705+
# GFX950: v_cvt_scalef32_pk_bf16_bf8 v1, s2, 3 ; encoding: [0x01,0x00,0x6a,0xd2,0x02,0x06,0x01,0x00]
706+
0x01,0x00,0x6a,0xd2,0x02,0x06,0x01,0x00
707+
708+
# GFX950: v_cvt_scalef32_pk_bf16_bf8 v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x6a,0xd2,0x02,0x07,0x02,0x00]
709+
0x01,0x08,0x6a,0xd2,0x02,0x07,0x02,0x00
710+
711+
# GFX950: v_cvt_scalef32_pk_bf16_bf8 v1, v2, s3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x6a,0xd2,0x02,0x07,0x00,0x00]
712+
0x01,0x08,0x6a,0xd2,0x02,0x07,0x00,0x00
713+
714+
# GFX950: v_cvt_scalef32_pk_bf16_bf8 v1, s2, 3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x6a,0xd2,0x02,0x06,0x01,0x00]
715+
0x01,0x08,0x6a,0xd2,0x02,0x06,0x01,0x00

0 commit comments

Comments
 (0)