Skip to content

Commit 365136e

Browse files
arsenmshiltian
authored andcommitted
AMDGPU: Add support for v_cvt_scalef32_sr instructions (llvm#117820)
Co-authored-by: Shilei Tian <[email protected]>
1 parent 59574d9 commit 365136e

File tree

11 files changed

+820
-19
lines changed

11 files changed

+820
-19
lines changed

clang/include/clang/Basic/BuiltinsAMDGPU.def

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -600,5 +600,12 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_sr_pk_fp4_f16, "UiUiV2hUifIi", "nc"
600600
TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_sr_pk_fp4_bf16, "UiUiV2yUifIi", "nc", "fp4-cvt-scale-insts")
601601
TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_sr_pk_fp4_f32, "UiUiV2fUifIi", "nc", "fp4-cvt-scale-insts")
602602

603+
TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_sr_pk32_bf6_bf16, "V6UiV32yUif", "nc", "f16bf16-to-fp6bf6-cvt-scale-insts")
604+
TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_sr_pk32_bf6_f16, "V6UiV32hUif", "nc", "f16bf16-to-fp6bf6-cvt-scale-insts")
605+
TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_sr_pk32_bf6_f32, "V6UiV32fUif", "nc", "f16bf16-to-fp6bf6-cvt-scale-insts")
606+
TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_sr_pk32_fp6_bf16, "V6UiV32yUif", "nc", "f16bf16-to-fp6bf6-cvt-scale-insts")
607+
TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_sr_pk32_fp6_f16, "V6UiV32hUif", "nc", "f16bf16-to-fp6bf6-cvt-scale-insts")
608+
TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_sr_pk32_fp6_f32, "V6UiV32fUif", "nc", "f16bf16-to-fp6bf6-cvt-scale-insts")
609+
603610
#undef BUILTIN
604611
#undef TARGET_BUILTIN

clang/test/CodeGenOpenCL/builtins-amdgcn-gfx950.cl

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1338,3 +1338,65 @@ void test_cvt_scalef32_sr_pk_fp4_f32(global unsigned *out, float2 src, uint seed
13381338
*out = __builtin_amdgcn_cvt_scalef32_sr_pk_fp4_f32(*out, src, seed, scale, 2);
13391339
*out = __builtin_amdgcn_cvt_scalef32_sr_pk_fp4_f32(*out, src, seed, scale, 3);
13401340
}
1341+
1342+
// CHECK-LABEL: @test_cvt_scalef32_sr_pk32(
1343+
// CHECK-NEXT: entry:
1344+
// CHECK-NEXT: [[OUT6_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
1345+
// CHECK-NEXT: [[SRCBF32_ADDR:%.*]] = alloca <32 x bfloat>, align 64, addrspace(5)
1346+
// CHECK-NEXT: [[SRCH32_ADDR:%.*]] = alloca <32 x half>, align 64, addrspace(5)
1347+
// CHECK-NEXT: [[SRCF32_ADDR:%.*]] = alloca <32 x float>, align 128, addrspace(5)
1348+
// CHECK-NEXT: [[SRC1_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
1349+
// CHECK-NEXT: [[SRC2_ADDR:%.*]] = alloca float, align 4, addrspace(5)
1350+
// CHECK-NEXT: store ptr addrspace(1) [[OUT6:%.*]], ptr addrspace(5) [[OUT6_ADDR]], align 8
1351+
// CHECK-NEXT: store <32 x bfloat> [[SRCBF32:%.*]], ptr addrspace(5) [[SRCBF32_ADDR]], align 64
1352+
// CHECK-NEXT: store <32 x half> [[SRCH32:%.*]], ptr addrspace(5) [[SRCH32_ADDR]], align 64
1353+
// CHECK-NEXT: store <32 x float> [[SRCF32:%.*]], ptr addrspace(5) [[SRCF32_ADDR]], align 128
1354+
// CHECK-NEXT: store i32 [[SRC1:%.*]], ptr addrspace(5) [[SRC1_ADDR]], align 4
1355+
// CHECK-NEXT: store float [[SRC2:%.*]], ptr addrspace(5) [[SRC2_ADDR]], align 4
1356+
// CHECK-NEXT: [[TMP0:%.*]] = load <32 x bfloat>, ptr addrspace(5) [[SRCBF32_ADDR]], align 64
1357+
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[SRC1_ADDR]], align 4
1358+
// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(5) [[SRC2_ADDR]], align 4
1359+
// CHECK-NEXT: [[TMP3:%.*]] = call <6 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk32.bf6.bf16(<32 x bfloat> [[TMP0]], i32 [[TMP1]], float [[TMP2]])
1360+
// CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT6_ADDR]], align 8
1361+
// CHECK-NEXT: store <6 x i32> [[TMP3]], ptr addrspace(1) [[TMP4]], align 32
1362+
// CHECK-NEXT: [[TMP5:%.*]] = load <32 x half>, ptr addrspace(5) [[SRCH32_ADDR]], align 64
1363+
// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(5) [[SRC1_ADDR]], align 4
1364+
// CHECK-NEXT: [[TMP7:%.*]] = load float, ptr addrspace(5) [[SRC2_ADDR]], align 4
1365+
// CHECK-NEXT: [[TMP8:%.*]] = call <6 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk32.bf6.f16(<32 x half> [[TMP5]], i32 [[TMP6]], float [[TMP7]])
1366+
// CHECK-NEXT: [[TMP9:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT6_ADDR]], align 8
1367+
// CHECK-NEXT: store <6 x i32> [[TMP8]], ptr addrspace(1) [[TMP9]], align 32
1368+
// CHECK-NEXT: [[TMP10:%.*]] = load <32 x float>, ptr addrspace(5) [[SRCF32_ADDR]], align 128
1369+
// CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(5) [[SRC1_ADDR]], align 4
1370+
// CHECK-NEXT: [[TMP12:%.*]] = load float, ptr addrspace(5) [[SRC2_ADDR]], align 4
1371+
// CHECK-NEXT: [[TMP13:%.*]] = call <6 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk32.bf6.f32(<32 x float> [[TMP10]], i32 [[TMP11]], float [[TMP12]])
1372+
// CHECK-NEXT: [[TMP14:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT6_ADDR]], align 8
1373+
// CHECK-NEXT: store <6 x i32> [[TMP13]], ptr addrspace(1) [[TMP14]], align 32
1374+
// CHECK-NEXT: [[TMP15:%.*]] = load <32 x bfloat>, ptr addrspace(5) [[SRCBF32_ADDR]], align 64
1375+
// CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr addrspace(5) [[SRC1_ADDR]], align 4
1376+
// CHECK-NEXT: [[TMP17:%.*]] = load float, ptr addrspace(5) [[SRC2_ADDR]], align 4
1377+
// CHECK-NEXT: [[TMP18:%.*]] = call <6 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk32.fp6.bf16(<32 x bfloat> [[TMP15]], i32 [[TMP16]], float [[TMP17]])
1378+
// CHECK-NEXT: [[TMP19:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT6_ADDR]], align 8
1379+
// CHECK-NEXT: store <6 x i32> [[TMP18]], ptr addrspace(1) [[TMP19]], align 32
1380+
// CHECK-NEXT: [[TMP20:%.*]] = load <32 x half>, ptr addrspace(5) [[SRCH32_ADDR]], align 64
1381+
// CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr addrspace(5) [[SRC1_ADDR]], align 4
1382+
// CHECK-NEXT: [[TMP22:%.*]] = load float, ptr addrspace(5) [[SRC2_ADDR]], align 4
1383+
// CHECK-NEXT: [[TMP23:%.*]] = call <6 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk32.fp6.f16(<32 x half> [[TMP20]], i32 [[TMP21]], float [[TMP22]])
1384+
// CHECK-NEXT: [[TMP24:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT6_ADDR]], align 8
1385+
// CHECK-NEXT: store <6 x i32> [[TMP23]], ptr addrspace(1) [[TMP24]], align 32
1386+
// CHECK-NEXT: [[TMP25:%.*]] = load <32 x float>, ptr addrspace(5) [[SRCF32_ADDR]], align 128
1387+
// CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr addrspace(5) [[SRC1_ADDR]], align 4
1388+
// CHECK-NEXT: [[TMP27:%.*]] = load float, ptr addrspace(5) [[SRC2_ADDR]], align 4
1389+
// CHECK-NEXT: [[TMP28:%.*]] = call <6 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk32.fp6.f32(<32 x float> [[TMP25]], i32 [[TMP26]], float [[TMP27]])
1390+
// CHECK-NEXT: [[TMP29:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT6_ADDR]], align 8
1391+
// CHECK-NEXT: store <6 x i32> [[TMP28]], ptr addrspace(1) [[TMP29]], align 32
1392+
// CHECK-NEXT: ret void
1393+
//
1394+
void test_cvt_scalef32_sr_pk32(global uint6 *out6, bfloat32 srcbf32, half32 srch32, float32 srcf32, unsigned src1, float src2)
1395+
{
1396+
*out6 = __builtin_amdgcn_cvt_scalef32_sr_pk32_bf6_bf16(srcbf32, src1, src2);
1397+
*out6 = __builtin_amdgcn_cvt_scalef32_sr_pk32_bf6_f16(srch32, src1, src2);
1398+
*out6 = __builtin_amdgcn_cvt_scalef32_sr_pk32_bf6_f32(srcf32, src1, src2);
1399+
*out6 = __builtin_amdgcn_cvt_scalef32_sr_pk32_fp6_bf16(srcbf32, src1, src2);
1400+
*out6 = __builtin_amdgcn_cvt_scalef32_sr_pk32_fp6_f16(srch32, src1, src2);
1401+
*out6 = __builtin_amdgcn_cvt_scalef32_sr_pk32_fp6_f32(srcf32, src1, src2);
1402+
}

llvm/include/llvm/IR/IntrinsicsAMDGPU.td

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -569,13 +569,24 @@ class AMDGPUCvtScaleF32ToFP6BF6Intrinsic<LLVMType DstTy, LLVMType Src0Ty, LLVMTy
569569
[DstTy], [Src0Ty, Src1Ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable]
570570
>, ClangBuiltin<"__builtin_amdgcn_"#name>;
571571

572+
class AMDGPUCvtScaleF32SRIntrinsic<LLVMType DstTy, LLVMType Src0Ty, string name> : DefaultAttrsIntrinsic<
573+
[DstTy], [Src0Ty, llvm_i32_ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable]
574+
>, ClangBuiltin<"__builtin_amdgcn_"#name>;
575+
572576
def int_amdgcn_cvt_scalef32_pk32_fp6_f16 : AMDGPUCvtScaleF32Intrinsic<llvm_v6i32_ty, llvm_v32f16_ty, "cvt_scalef32_pk32_fp6_f16">;
573577
def int_amdgcn_cvt_scalef32_pk32_bf6_f16 : AMDGPUCvtScaleF32Intrinsic<llvm_v6i32_ty, llvm_v32f16_ty, "cvt_scalef32_pk32_bf6_f16">;
574578
def int_amdgcn_cvt_scalef32_pk32_fp6_bf16 : AMDGPUCvtScaleF32Intrinsic<llvm_v6i32_ty, llvm_v32bf16_ty, "cvt_scalef32_pk32_fp6_bf16">;
575579
def int_amdgcn_cvt_scalef32_pk32_bf6_bf16 : AMDGPUCvtScaleF32Intrinsic<llvm_v6i32_ty, llvm_v32bf16_ty, "cvt_scalef32_pk32_bf6_bf16">;
576580
def int_amdgcn_cvt_scalef32_2xpk16_fp6_f32 : AMDGPUCvtScaleF32ToFP6BF6Intrinsic<llvm_v6i32_ty, llvm_v16f32_ty, llvm_v16f32_ty, "cvt_scalef32_2xpk16_fp6_f32">;
577581
def int_amdgcn_cvt_scalef32_2xpk16_bf6_f32 : AMDGPUCvtScaleF32ToFP6BF6Intrinsic<llvm_v6i32_ty, llvm_v16f32_ty, llvm_v16f32_ty, "cvt_scalef32_2xpk16_bf6_f32">;
578582

583+
def int_amdgcn_cvt_scalef32_sr_pk32_bf6_bf16 : AMDGPUCvtScaleF32SRIntrinsic<llvm_v6i32_ty, llvm_v32bf16_ty, "cvt_scalef32_sr_pk32_bf6_bf16">;
584+
def int_amdgcn_cvt_scalef32_sr_pk32_bf6_f16 : AMDGPUCvtScaleF32SRIntrinsic<llvm_v6i32_ty, llvm_v32f16_ty, "cvt_scalef32_sr_pk32_bf6_f16">;
585+
def int_amdgcn_cvt_scalef32_sr_pk32_bf6_f32 : AMDGPUCvtScaleF32SRIntrinsic<llvm_v6i32_ty, llvm_v32f32_ty, "cvt_scalef32_sr_pk32_bf6_f32">;
586+
def int_amdgcn_cvt_scalef32_sr_pk32_fp6_bf16 : AMDGPUCvtScaleF32SRIntrinsic<llvm_v6i32_ty, llvm_v32bf16_ty, "cvt_scalef32_sr_pk32_fp6_bf16">;
587+
def int_amdgcn_cvt_scalef32_sr_pk32_fp6_f16 : AMDGPUCvtScaleF32SRIntrinsic<llvm_v6i32_ty, llvm_v32f16_ty, "cvt_scalef32_sr_pk32_fp6_f16">;
588+
def int_amdgcn_cvt_scalef32_sr_pk32_fp6_f32 : AMDGPUCvtScaleF32SRIntrinsic<llvm_v6i32_ty, llvm_v32f32_ty, "cvt_scalef32_sr_pk32_fp6_f32">;
589+
579590
class AMDGPUCvtScaleFP4FP8BF8ToF1632Intrinsic<LLVMType DstTy, string name> : DefaultAttrsIntrinsic<
580591
[DstTy],
581592
[llvm_i32_ty, // src

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4555,6 +4555,12 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
45554555
case Intrinsic::amdgcn_cvt_scalef32_sr_pk_fp4_f16:
45564556
case Intrinsic::amdgcn_cvt_scalef32_sr_pk_fp4_bf16:
45574557
case Intrinsic::amdgcn_cvt_scalef32_sr_pk_fp4_f32:
4558+
case Intrinsic::amdgcn_cvt_scalef32_sr_pk32_bf6_bf16:
4559+
case Intrinsic::amdgcn_cvt_scalef32_sr_pk32_bf6_f16:
4560+
case Intrinsic::amdgcn_cvt_scalef32_sr_pk32_bf6_f32:
4561+
case Intrinsic::amdgcn_cvt_scalef32_sr_pk32_fp6_bf16:
4562+
case Intrinsic::amdgcn_cvt_scalef32_sr_pk32_fp6_f16:
4563+
case Intrinsic::amdgcn_cvt_scalef32_sr_pk32_fp6_f32:
45584564
case Intrinsic::amdgcn_ashr_pk_i8_i32:
45594565
case Intrinsic::amdgcn_ashr_pk_u8_i32:
45604566
case Intrinsic::amdgcn_cvt_scalef32_2xpk16_fp6_f32:

llvm/lib/Target/AMDGPU/SIInstrInfo.td

Lines changed: 25 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1639,25 +1639,27 @@ class getSDWASrcForVT <ValueType VT> {
16391639
// given VT.
16401640
class getVOP3SrcForVT<ValueType VT, bit IsTrue16 = 0> {
16411641
RegisterOperand ret =
1642-
!cond(!eq(VT, f64) : VSrc_f64,
1643-
!eq(VT, f32) : VSrc_f32,
1644-
!eq(VT, f16) : !if(IsTrue16, VSrcT_f16, VSrc_f16),
1645-
!eq(VT, bf16) : !if(IsTrue16, VSrcT_bf16, VSrc_bf16),
1646-
!eq(VT, i16) : !if(IsTrue16, VSrcT_b16, VSrc_b16),
1647-
!eq(VT, i1) : SSrc_i1,
1648-
!eq(VT, v2f32) : VSrc_v2f32,
1649-
!eq(VT, v2i32) : VSrc_v2b32,
1650-
!eq(VT, v2f16) : VSrc_v2f16,
1651-
!eq(VT, v2bf16) : VSrc_v2bf16,
1652-
!eq(VT, v2i16) : VSrc_v2b16,
1653-
!eq(VT, v4f16) : AVSrc_64,
1654-
!eq(VT, v4bf16) : AVSrc_64,
1655-
!eq(VT.Size, 512) : VRegSrc_512,
1656-
!eq(VT.Size, 192) : VRegSrc_192,
1657-
!eq(VT.Size, 128) : VRegSrc_128,
1658-
!eq(VT.Size, 96) : VRegSrc_96,
1659-
!eq(VT.Size, 64) : VSrc_b64,
1660-
1 : VSrc_b32);
1642+
!cond(!eq(VT, f64) : VSrc_f64,
1643+
!eq(VT, f32) : VSrc_f32,
1644+
!eq(VT, f16) : !if(IsTrue16, VSrcT_f16, VSrc_f16),
1645+
!eq(VT, bf16) : !if(IsTrue16, VSrcT_bf16, VSrc_bf16),
1646+
!eq(VT, i16) : !if(IsTrue16, VSrcT_b16, VSrc_b16),
1647+
!eq(VT, i1) : SSrc_i1,
1648+
!eq(VT, v2f32) : VSrc_v2f32,
1649+
!eq(VT, v2i32) : VSrc_v2b32,
1650+
!eq(VT, v2f16) : VSrc_v2f16,
1651+
!eq(VT, v2bf16) : VSrc_v2bf16,
1652+
!eq(VT, v2i16) : VSrc_v2b16,
1653+
!eq(VT, v4f16) : AVSrc_64,
1654+
!eq(VT, v4bf16) : AVSrc_64,
1655+
!eq(VT.Size, 1024) : VRegSrc_1024,
1656+
!eq(VT.Size, 512) : VRegSrc_512,
1657+
!eq(VT.Size, 256) : VRegSrc_256,
1658+
!eq(VT.Size, 192) : VRegSrc_192,
1659+
!eq(VT.Size, 128) : VRegSrc_128,
1660+
!eq(VT.Size, 96) : VRegSrc_96,
1661+
!eq(VT.Size, 64) : VSrc_b64,
1662+
1 : VSrc_b32);
16611663
}
16621664

16631665
// Src2 of VOP3 DPP instructions cannot be a literal
@@ -2663,6 +2665,10 @@ def VOP_I32_BF16_I32_F32 : VOPProfile<[i32, bf16, i32, f32]>;
26632665
def VOP_I32_F16_I32_F32 : VOPProfile<[i32, f16, i32, f32]>;
26642666
def VOP_I32_F32_I32_F32 : VOPProfile<[i32, f32, i32, f32]>;
26652667

2668+
def VOP_V6I32_V32BF16_I32_F32 : VOPProfile<[v6i32, v32bf16, i32, f32]>;
2669+
def VOP_V6I32_V32F16_I32_F32 : VOPProfile<[v6i32, v32f16, i32, f32]>;
2670+
def VOP_V6I32_V32F32_I32_F32 : VOPProfile<[v6i32, v32f32, i32, f32]>;
2671+
26662672
def VOP_I64_I64_I32 : VOPProfile <[i64, i64, i32, untyped]>;
26672673
def VOP_I64_I32_I64 : VOPProfile <[i64, i32, i64, untyped]>;
26682674
def VOP_I64_I64_I64 : VOPProfile <[i64, i64, i64, untyped]>;

llvm/lib/Target/AMDGPU/SIRegisterInfo.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1253,6 +1253,7 @@ def VRegSrc_128: SrcReg9<VReg_128, "OPW128">;
12531253
def VRegSrc_192: SrcReg9<VReg_192, "OPW192">;
12541254
def VRegSrc_256: SrcReg9<VReg_256, "OPW256">;
12551255
def VRegSrc_512: SrcReg9<VReg_512, "OPW512">;
1256+
def VRegSrc_1024: SrcReg9<VReg_1024, "OPW1024">;
12561257
def VRegOrLdsSrc_32 : SrcReg9<VRegOrLds_32, "OPW32">;
12571258

12581259
//===----------------------------------------------------------------------===//

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1120,6 +1120,12 @@ let SubtargetPredicate = HasF16BF16ToFP6BF6ConversionScaleInsts, mayRaiseFPExcep
11201120
defm V_CVT_SCALEF32_PK32_BF6_F16 : VOP3Inst<"v_cvt_scalef32_pk32_bf6_f16", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V6I32_V32F16_F32>, int_amdgcn_cvt_scalef32_pk32_bf6_f16>;
11211121
defm V_CVT_SCALEF32_PK32_FP6_BF16 : VOP3Inst<"v_cvt_scalef32_pk32_fp6_bf16", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V6I32_V32BF16_F32>, int_amdgcn_cvt_scalef32_pk32_fp6_bf16>;
11221122
defm V_CVT_SCALEF32_PK32_BF6_BF16 : VOP3Inst<"v_cvt_scalef32_pk32_bf6_bf16", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V6I32_V32BF16_F32>, int_amdgcn_cvt_scalef32_pk32_bf6_bf16>;
1123+
defm V_CVT_SCALEF32_SR_PK32_BF6_BF16 : VOP3Inst<"v_cvt_scalef32_sr_pk32_bf6_bf16", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V6I32_V32BF16_I32_F32>, int_amdgcn_cvt_scalef32_sr_pk32_bf6_bf16>;
1124+
defm V_CVT_SCALEF32_SR_PK32_BF6_F16 : VOP3Inst<"v_cvt_scalef32_sr_pk32_bf6_f16", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V6I32_V32F16_I32_F32>, int_amdgcn_cvt_scalef32_sr_pk32_bf6_f16>;
1125+
defm V_CVT_SCALEF32_SR_PK32_BF6_F32 : VOP3Inst<"v_cvt_scalef32_sr_pk32_bf6_f32", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V6I32_V32F32_I32_F32>, int_amdgcn_cvt_scalef32_sr_pk32_bf6_f32>;
1126+
defm V_CVT_SCALEF32_SR_PK32_FP6_BF16 : VOP3Inst<"v_cvt_scalef32_sr_pk32_fp6_bf16", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V6I32_V32BF16_I32_F32>, int_amdgcn_cvt_scalef32_sr_pk32_fp6_bf16>;
1127+
defm V_CVT_SCALEF32_SR_PK32_FP6_F16 : VOP3Inst<"v_cvt_scalef32_sr_pk32_fp6_f16", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V6I32_V32F16_I32_F32>, int_amdgcn_cvt_scalef32_sr_pk32_fp6_f16>;
1128+
defm V_CVT_SCALEF32_SR_PK32_FP6_F32 : VOP3Inst<"v_cvt_scalef32_sr_pk32_fp6_f32", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V6I32_V32F32_I32_F32>, int_amdgcn_cvt_scalef32_sr_pk32_fp6_f32>;
11231129
}
11241130

11251131
let SubtargetPredicate = HasGFX950Insts, mayRaiseFPException = 0 in {
@@ -2183,6 +2189,12 @@ defm V_CVT_SCALEF32_PK32_FP6_F16 : VOP3_Real_gfx9<0x258, "v_cvt_scalef32_pk32_f
21832189
defm V_CVT_SCALEF32_PK32_FP6_BF16 : VOP3_Real_gfx9<0x259, "v_cvt_scalef32_pk32_fp6_bf16">;
21842190
defm V_CVT_SCALEF32_PK32_BF6_F16 : VOP3_Real_gfx9<0x25a, "v_cvt_scalef32_pk32_bf6_f16">;
21852191
defm V_CVT_SCALEF32_PK32_BF6_BF16 : VOP3_Real_gfx9<0x25b, "v_cvt_scalef32_pk32_bf6_bf16">;
2192+
defm V_CVT_SCALEF32_SR_PK32_BF6_BF16 : VOP3_Real_gfx9<0x25f, "v_cvt_scalef32_sr_pk32_bf6_bf16">;
2193+
defm V_CVT_SCALEF32_SR_PK32_BF6_F16 : VOP3_Real_gfx9<0x25e, "v_cvt_scalef32_sr_pk32_bf6_f16">;
2194+
defm V_CVT_SCALEF32_SR_PK32_BF6_F32 : VOP3_Real_gfx9<0x255, "v_cvt_scalef32_sr_pk32_bf6_f32">;
2195+
defm V_CVT_SCALEF32_SR_PK32_FP6_BF16 : VOP3_Real_gfx9<0x25d, "v_cvt_scalef32_sr_pk32_fp6_bf16">;
2196+
defm V_CVT_SCALEF32_SR_PK32_FP6_F16 : VOP3_Real_gfx9<0x25c, "v_cvt_scalef32_sr_pk32_fp6_f16">;
2197+
defm V_CVT_SCALEF32_SR_PK32_FP6_F32 : VOP3_Real_gfx9<0x254, "v_cvt_scalef32_sr_pk32_fp6_f32">;
21862198
}
21872199

21882200
let OtherPredicates = [HasF32ToF16BF16ConversionSRInsts] in {

0 commit comments

Comments
 (0)