Skip to content

Commit ef0e27d

Browse files
committed
[AMDGPU][True16][MC] VOP3 profile in True16 format
1 parent c0b1c62 commit ef0e27d

File tree

8 files changed

+654
-340
lines changed

8 files changed

+654
-340
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5464,8 +5464,12 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
54645464
case AMDGPU::S_SUB_F16: return AMDGPU::V_SUB_F16_fake16_e64;
54655465
case AMDGPU::S_MIN_F16: return AMDGPU::V_MIN_F16_fake16_e64;
54665466
case AMDGPU::S_MAX_F16: return AMDGPU::V_MAX_F16_fake16_e64;
5467-
case AMDGPU::S_MINIMUM_F16: return AMDGPU::V_MINIMUM_F16_e64;
5468-
case AMDGPU::S_MAXIMUM_F16: return AMDGPU::V_MAXIMUM_F16_e64;
5467+
case AMDGPU::S_MINIMUM_F16:
5468+
return ST.useRealTrue16Insts() ? AMDGPU::V_MINIMUM_F16_t16_e64
5469+
: AMDGPU::V_MINIMUM_F16_fake16_e64;
5470+
case AMDGPU::S_MAXIMUM_F16:
5471+
return ST.useRealTrue16Insts() ? AMDGPU::V_MAXIMUM_F16_t16_e64
5472+
: AMDGPU::V_MAXIMUM_F16_fake16_e64;
54695473
case AMDGPU::S_MUL_F16: return AMDGPU::V_MUL_F16_fake16_e64;
54705474
case AMDGPU::S_CVT_PK_RTZ_F16_F32: return AMDGPU::V_CVT_PKRTZ_F16_F32_e64;
54715475
case AMDGPU::S_FMAC_F32: return AMDGPU::V_FMAC_F32_e64;
@@ -7358,9 +7362,7 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
73587362
return;
73597363
}
73607364
case AMDGPU::S_MINIMUM_F32:
7361-
case AMDGPU::S_MAXIMUM_F32:
7362-
case AMDGPU::S_MINIMUM_F16:
7363-
case AMDGPU::S_MAXIMUM_F16: {
7365+
case AMDGPU::S_MAXIMUM_F32: {
73647366
const DebugLoc &DL = Inst.getDebugLoc();
73657367
Register NewDst = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
73667368
MachineInstr *NewInstr = BuildMI(*MBB, Inst, DL, get(NewOpcode), NewDst)
@@ -7371,7 +7373,28 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
73717373
.addImm(0) // clamp
73727374
.addImm(0); // omod
73737375
MRI.replaceRegWith(Inst.getOperand(0).getReg(), NewDst);
7374-
7376+
legalizeOperands(*NewInstr, MDT);
7377+
addUsersToMoveToVALUWorklist(NewDst, MRI, Worklist);
7378+
Inst.eraseFromParent();
7379+
return;
7380+
}
7381+
case AMDGPU::S_MINIMUM_F16:
7382+
case AMDGPU::S_MAXIMUM_F16: {
7383+
const DebugLoc &DL = Inst.getDebugLoc();
7384+
Register NewDst;
7385+
if (ST.useRealTrue16Insts())
7386+
NewDst = MRI.createVirtualRegister(&AMDGPU::VGPR_16RegClass);
7387+
else
7388+
NewDst = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7389+
MachineInstr *NewInstr = BuildMI(*MBB, Inst, DL, get(NewOpcode), NewDst)
7390+
.addImm(0) // src0_modifiers
7391+
.add(Inst.getOperand(1))
7392+
.addImm(0) // src1_modifiers
7393+
.add(Inst.getOperand(2))
7394+
.addImm(0) // clamp
7395+
.addImm(0) // omod
7396+
.addImm(0); // opsel0
7397+
MRI.replaceRegWith(Inst.getOperand(0).getReg(), NewDst);
73757398
legalizeOperands(*NewInstr, MDT);
73767399
addUsersToMoveToVALUWorklist(NewDst, MRI, Worklist);
73777400
Inst.eraseFromParent();

llvm/lib/Target/AMDGPU/SIInstrInfo.td

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2237,8 +2237,9 @@ class getAsmVOP3Base <int NumSrcArgs, bit HasDst, bit HasClamp,
22372237
string clamp = !if(HasClamp, "$clamp", "");
22382238
string omod = !if(HasOMod, "$omod", "");
22392239

2240-
string ret = dst#!if(!gt(NumSrcArgs,0),", "#src0#src1#src2#opsel#bytesel#3PMods#clamp#omod, "");
2241-
2240+
string ret = dst#!if(!eq(NumSrcArgs,0),
2241+
"",
2242+
!if(HasDst,", ", "")#src0#src1#src2#opsel#bytesel#3PMods#clamp#omod);
22422243
}
22432244

22442245
class getAsmVOP3DPP<string base> {
@@ -2735,6 +2736,7 @@ def VOP_F32_F32_F16_F16 : VOPProfile <[f32, f32, f16, f16]>;
27352736
def VOP_F32_F32_F32_F32 : VOPProfile <[f32, f32, f32, f32]>;
27362737
def VOP_F64_F64_F64_F64 : VOPProfile <[f64, f64, f64, f64]>;
27372738
def VOP_I32_I32_I32_I32 : VOPProfile <[i32, i32, i32, i32]>;
2739+
def VOP_I32_I32_I32_I16 : VOPProfile <[i32, i32, i32, i16]>;
27382740
def VOP_I64_I32_I32_I64 : VOPProfile <[i64, i32, i32, i64]>;
27392741
def VOP_I32_F32_I32_I32 : VOPProfile <[i32, f32, i32, i32]>;
27402742
def VOP_I64_I64_I32_I64 : VOPProfile <[i64, i64, i32, i64]>;

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 44 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2444,6 +2444,7 @@ def : AMDGPUPat <
24442444
$src1), sub1)
24452445
>;
24462446

2447+
let True16Predicate = NotHasTrue16BitInsts in {
24472448
def : ROTRPattern <V_ALIGNBIT_B32_e64>;
24482449

24492450
def : GCNPat<(i32 (trunc (srl i64:$src0, (and i32:$src1, (i32 31))))),
@@ -2453,6 +2454,30 @@ def : GCNPat<(i32 (trunc (srl i64:$src0, (and i32:$src1, (i32 31))))),
24532454
def : GCNPat<(i32 (trunc (srl i64:$src0, (i32 ShiftAmt32Imm:$src1)))),
24542455
(V_ALIGNBIT_B32_e64 (i32 (EXTRACT_SUBREG (i64 $src0), sub1)),
24552456
(i32 (EXTRACT_SUBREG (i64 $src0), sub0)), $src1)>;
2457+
} // end OtherPredicates = [NotHasTrue16BitInsts]
2458+
2459+
let True16Predicate = UseFakeTrue16Insts in {
2460+
def ROTRPattern_fake16 : GCNPat <
2461+
(rotr i32:$src0, i32:$src1),
2462+
(V_ALIGNBIT_B32_fake16_e64 /* src0_modifiers */ 0, $src0,
2463+
/* src1_modifiers */ 0, $src0,
2464+
/* src2_modifiers */ 0,
2465+
$src1, /* clamp */ 0, /* op_sel */ 0)
2466+
>;
2467+
def : GCNPat<(i32 (trunc (srl i64:$src0, (i32 ShiftAmt32Imm:$src1)))),
2468+
(V_ALIGNBIT_B32_fake16_e64 0, /* src0_modifiers */
2469+
(i32 (EXTRACT_SUBREG (i64 $src0), sub1)),
2470+
0, /* src1_modifiers */
2471+
(i32 (EXTRACT_SUBREG (i64 $src0), sub0)),
2472+
0, /* src2_modifiers */
2473+
$src1, /* clamp */ 0, /* op_sel */ 0)>;
2474+
2475+
def : GCNPat<(fshr i32:$src0, i32:$src1, i32:$src2),
2476+
(V_ALIGNBIT_B32_fake16_e64 /* src0_modifiers */ 0, $src0,
2477+
/* src1_modifiers */ 0, $src1,
2478+
/* src2_modifiers */ 0,
2479+
$src2, /* clamp */ 0, /* op_sel */ 0)>;
2480+
} // end OtherPredicates = [UseFakeTrue16Insts]
24562481

24572482
/********** ====================== **********/
24582483
/********** Indirect addressing **********/
@@ -2932,6 +2957,7 @@ def : GCNPat <
29322957
(i32 (EXTRACT_SUBREG $a, sub0))), (i32 1))
29332958
>;
29342959

2960+
let True16Predicate = NotHasTrue16BitInsts in
29352961
def : GCNPat <
29362962
(i32 (bswap i32:$a)),
29372963
(V_BFI_B32_e64 (S_MOV_B32 (i32 0x00ff00ff)),
@@ -2941,6 +2967,7 @@ def : GCNPat <
29412967

29422968
// FIXME: This should have been narrowed to i32 during legalization.
29432969
// This pattern should also be skipped for GlobalISel
2970+
let True16Predicate = NotHasTrue16BitInsts in
29442971
def : GCNPat <
29452972
(i64 (bswap i64:$a)),
29462973
(REG_SEQUENCE VReg_64,
@@ -3312,6 +3339,7 @@ def : GCNPat <
33123339

33133340
// Take the upper 16 bits from V[0] and the lower 16 bits from V[1]
33143341
// Special case, can use V_ALIGNBIT (always uses encoded literal)
3342+
let True16Predicate = NotHasTrue16BitInsts in
33153343
def : GCNPat <
33163344
(vecTy (DivergentBinFrag<build_vector>
33173345
(Ty !if(!eq(Ty, i16),
@@ -3321,6 +3349,16 @@ def : GCNPat <
33213349
(V_ALIGNBIT_B32_e64 VGPR_32:$b, VGPR_32:$a, (i32 16))
33223350
>;
33233351

3352+
let True16Predicate = UseFakeTrue16Insts in
3353+
def : GCNPat <
3354+
(vecTy (DivergentBinFrag<build_vector>
3355+
(Ty !if(!eq(Ty, i16),
3356+
(Ty (trunc (srl VGPR_32:$a, (i32 16)))),
3357+
(Ty (bitconvert (i16 (trunc (srl VGPR_32:$a, (i32 16)))))))),
3358+
(Ty VGPR_32:$b))),
3359+
(V_ALIGNBIT_B32_fake16_e64 0, VGPR_32:$b, 0, VGPR_32:$a, 0, (i16 16), 0, 0)
3360+
>;
3361+
33243362
// Take the upper 16 bits from each VGPR_32 and concat them
33253363
def : GCNPat <
33263364
(vecTy (DivergentBinFrag<build_vector>
@@ -3682,12 +3720,14 @@ defm : Int16Med3Pat<V_MED3_U16_e64, umin, umax>;
36823720
let OtherPredicates = [isGFX12Plus] in {
36833721
def : FPMinMaxPat<V_MINIMUMMAXIMUM_F32_e64, f32, DivergentBinFrag<fmaximum>, fminimum_oneuse>;
36843722
def : FPMinMaxPat<V_MAXIMUMMINIMUM_F32_e64, f32, DivergentBinFrag<fminimum>, fmaximum_oneuse>;
3685-
def : FPMinMaxPat<V_MINIMUMMAXIMUM_F16_e64, f16, DivergentBinFrag<fmaximum>, fminimum_oneuse>;
3686-
def : FPMinMaxPat<V_MAXIMUMMINIMUM_F16_e64, f16, DivergentBinFrag<fminimum>, fmaximum_oneuse>;
36873723
def : FPMinCanonMaxPat<V_MINIMUMMAXIMUM_F32_e64, f32, DivergentBinFrag<fmaximum>, fminimum_oneuse>;
36883724
def : FPMinCanonMaxPat<V_MAXIMUMMINIMUM_F32_e64, f32, DivergentBinFrag<fminimum>, fmaximum_oneuse>;
3689-
def : FPMinCanonMaxPat<V_MINIMUMMAXIMUM_F16_e64, f16, DivergentBinFrag<fmaximum>, fminimum_oneuse>;
3690-
def : FPMinCanonMaxPat<V_MAXIMUMMINIMUM_F16_e64, f16, DivergentBinFrag<fminimum>, fmaximum_oneuse>;
3725+
}
3726+
let True16Predicate = UseFakeTrue16Insts, OtherPredicates = [isGFX12Plus] in {
3727+
def : FPMinMaxPat<V_MINIMUMMAXIMUM_F16_fake16_e64, f16, DivergentBinFrag<fmaximum>, fminimum_oneuse>;
3728+
def : FPMinMaxPat<V_MAXIMUMMINIMUM_F16_fake16_e64, f16, DivergentBinFrag<fminimum>, fmaximum_oneuse>;
3729+
def : FPMinCanonMaxPat<V_MINIMUMMAXIMUM_F16_fake16_e64, f16, DivergentBinFrag<fmaximum>, fminimum_oneuse>;
3730+
def : FPMinCanonMaxPat<V_MAXIMUMMINIMUM_F16_fake16_e64, f16, DivergentBinFrag<fminimum>, fmaximum_oneuse>;
36913731
}
36923732

36933733
// Convert a floating-point power of 2 to the integer exponent.

llvm/lib/Target/AMDGPU/VOP2Instructions.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1682,8 +1682,8 @@ multiclass VOP3Only_Realtriple_gfx11_gfx12<bits<10> op> :
16821682
VOP3Only_Realtriple<GFX11Gen, op>, VOP3Only_Realtriple<GFX12Gen, op>;
16831683

16841684
multiclass VOP3Only_Realtriple_t16_gfx11_gfx12<bits<10> op, string asmName> :
1685-
VOP3Only_Realtriple_t16<GFX11Gen, op, asmName>,
1686-
VOP3Only_Realtriple_t16<GFX12Gen, op, asmName>;
1685+
VOP3_Realtriple_t16_gfx11<op, asmName, NAME, "", 1>,
1686+
VOP3_Realtriple_t16_gfx12<op, asmName, NAME, "", 1>;
16871687

16881688
multiclass VOP3beOnly_Realtriple_gfx11_gfx12<bits<10> op> :
16891689
VOP3beOnly_Realtriple<GFX11Gen, op>, VOP3beOnly_Realtriple<GFX12Gen, op>;

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 33 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -167,8 +167,8 @@ defm V_MUL_HI_I32 : VOP3Inst <"v_mul_hi_i32", V_MUL_PROF<VOP_I32_I32_I32>, mulhs
167167
let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in {
168168
defm V_MINIMUM_F32 : VOP3Inst <"v_minimum_f32", VOP3_Profile<VOP_F32_F32_F32>, DivergentBinFrag<fminimum>>;
169169
defm V_MAXIMUM_F32 : VOP3Inst <"v_maximum_f32", VOP3_Profile<VOP_F32_F32_F32>, DivergentBinFrag<fmaximum>>;
170-
defm V_MINIMUM_F16 : VOP3Inst <"v_minimum_f16", VOP3_Profile<VOP_F16_F16_F16>, DivergentBinFrag<fminimum>>;
171-
defm V_MAXIMUM_F16 : VOP3Inst <"v_maximum_f16", VOP3_Profile<VOP_F16_F16_F16>, DivergentBinFrag<fmaximum>>;
170+
defm V_MINIMUM_F16 : VOP3Inst_t16 <"v_minimum_f16", VOP_F16_F16_F16, DivergentBinFrag<fminimum>>;
171+
defm V_MAXIMUM_F16 : VOP3Inst_t16 <"v_maximum_f16", VOP_F16_F16_F16, DivergentBinFrag<fmaximum>>;
172172

173173
let SchedRW = [WriteDoubleAdd] in {
174174
defm V_MINIMUM_F64 : VOP3Inst <"v_minimum_f64", VOP3_Profile<VOP_F64_F64_F64>, fminimum>;
@@ -208,7 +208,11 @@ defm V_CUBEMA_F32 : VOP3Inst <"v_cubema_f32", VOP3_Profile<VOP_F32_F32_F32_F32>,
208208
defm V_BFE_U32 : VOP3Inst <"v_bfe_u32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUbfe_u32>;
209209
defm V_BFE_I32 : VOP3Inst <"v_bfe_i32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUbfe_i32>;
210210
defm V_BFI_B32 : VOP3Inst <"v_bfi_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUbfi>;
211-
defm V_ALIGNBIT_B32 : VOP3Inst <"v_alignbit_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, fshr>;
211+
212+
defm V_ALIGNBIT_B32 : VOP3Inst_t16_with_profiles <"v_alignbit_b32", VOP3_Profile<VOP_I32_I32_I32_I32>,
213+
VOP3_Profile_True16<VOP_I32_I32_I32_I16, VOP3_OPSEL>, VOP3_Profile_Fake16<VOP_I32_I32_I32_I16, VOP3_OPSEL>,
214+
fshr, null_frag>;
215+
212216
defm V_ALIGNBYTE_B32 : VOP3Inst <"v_alignbyte_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_alignbyte>;
213217

214218
// XXX - No FPException seems suspect but manual doesn't say it does
@@ -573,16 +577,10 @@ def VOP3_CVT_SR_F8_F32_Profile : VOP3_Profile<VOPProfile<[i32, f32, i32, f32]>,
573577
getAsmVOP3OpSel<3, HasClamp, HasOMod,
574578
HasSrc0FloatMods, HasSrc1FloatMods,
575579
HasSrc2FloatMods>.ret);
576-
let AsmVOP3DPP16 = !subst(", $src2_modifiers", "",
577-
getAsmVOP3DPP16<getAsmVOP3Base<3, 1, HasClamp, 1,
578-
HasOMod, 0, 1, HasSrc0FloatMods,
579-
HasSrc1FloatMods,
580-
HasSrc2FloatMods>.ret>.ret);
581-
let AsmVOP3DPP8 = !subst(", $src2_modifiers", "",
582-
getAsmVOP3DPP8<getAsmVOP3Base<3, 1, HasClamp, 1,
583-
HasOMod, 0, 1, HasSrc0FloatMods,
584-
HasSrc1FloatMods,
585-
HasSrc2FloatMods>.ret>.ret);
580+
let AsmVOP3Base = !subst(", $src2_modifiers", "",
581+
getAsmVOP3Base<NumSrcArgs, HasDst, HasClamp,
582+
HasOpSel, HasOMod, IsVOP3P, HasModifiers, HasModifiers, 0/*Src1Mods*/,
583+
HasModifiers, DstVT>.ret);
586584
}
587585

588586
class VOP3_CVT_SR_F8_ByteSel_Profile<ValueType SrcVT> :
@@ -636,8 +634,8 @@ defm V_MAX3_I16 : VOP3Inst <"v_max3_i16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3
636634
defm V_MAX3_U16 : VOP3Inst <"v_max3_u16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>, AMDGPUumax3>;
637635

638636
let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in {
639-
defm V_MINIMUM3_F16 : VOP3Inst <"v_minimum3_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, AMDGPUfminimum3>;
640-
defm V_MAXIMUM3_F16 : VOP3Inst <"v_maximum3_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, AMDGPUfmaximum3>;
637+
defm V_MINIMUM3_F16 : VOP3Inst_t16 <"v_minimum3_f16", VOP_F16_F16_F16_F16, AMDGPUfminimum3>;
638+
defm V_MAXIMUM3_F16 : VOP3Inst_t16 <"v_maximum3_f16", VOP_F16_F16_F16_F16, AMDGPUfmaximum3>;
641639
} // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0
642640

643641
defm V_ADD_I16 : VOP3Inst <"v_add_i16", VOP3_Profile<VOP_I16_I16_I16, VOP3_OPSEL>>;
@@ -940,8 +938,8 @@ let SubtargetPredicate = isGFX11Plus in {
940938
let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in {
941939
defm V_MAXIMUMMINIMUM_F32 : VOP3Inst<"v_maximumminimum_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
942940
defm V_MINIMUMMAXIMUM_F32 : VOP3Inst<"v_minimummaximum_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
943-
defm V_MAXIMUMMINIMUM_F16 : VOP3Inst<"v_maximumminimum_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>>;
944-
defm V_MINIMUMMAXIMUM_F16 : VOP3Inst<"v_minimummaximum_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>>;
941+
defm V_MAXIMUMMINIMUM_F16 : VOP3Inst_t16<"v_maximumminimum_f16", VOP_F16_F16_F16_F16>;
942+
defm V_MINIMUMMAXIMUM_F16 : VOP3Inst_t16<"v_minimummaximum_f16", VOP_F16_F16_F16_F16>;
945943
} // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0
946944

947945
let OtherPredicates = [HasDot9Insts], IsDOT=1 in {
@@ -1046,8 +1044,8 @@ defm V_MIN3_NUM_F16 : VOP3_Realtriple_with_name_gfx12<0x22b, "V_MIN3_F16",
10461044
defm V_MAX3_NUM_F16 : VOP3_Realtriple_with_name_gfx12<0x22c, "V_MAX3_F16", "v_max3_num_f16">;
10471045
defm V_MINIMUM3_F32 : VOP3Only_Realtriple_gfx12<0x22d>;
10481046
defm V_MAXIMUM3_F32 : VOP3Only_Realtriple_gfx12<0x22e>;
1049-
defm V_MINIMUM3_F16 : VOP3Only_Realtriple_t16_gfx12<0x22f>;
1050-
defm V_MAXIMUM3_F16 : VOP3Only_Realtriple_t16_gfx12<0x230>;
1047+
defm V_MINIMUM3_F16 : VOP3_Realtriple_t16_and_f16_gfx12<0x22f, "v_minimum3_f16">;
1048+
defm V_MAXIMUM3_F16 : VOP3_Realtriple_t16_and_f16_gfx12<0x230, "v_maximum3_f16">;
10511049
defm V_MED3_NUM_F32 : VOP3_Realtriple_with_name_gfx12<0x231, "V_MED3_F32", "v_med3_num_f32">;
10521050
defm V_MED3_NUM_F16 : VOP3_Realtriple_with_name_gfx12<0x232, "V_MED3_F16", "v_med3_num_f16">;
10531051
defm V_MINMAX_NUM_F32 : VOP3_Realtriple_with_name_gfx12<0x268, "V_MINMAX_F32", "v_minmax_num_f32">;
@@ -1056,8 +1054,8 @@ defm V_MINMAX_NUM_F16 : VOP3_Realtriple_with_name_gfx12<0x26a, "V_MINMAX_F16
10561054
defm V_MAXMIN_NUM_F16 : VOP3_Realtriple_with_name_gfx12<0x26b, "V_MAXMIN_F16", "v_maxmin_num_f16">;
10571055
defm V_MINIMUMMAXIMUM_F32 : VOP3Only_Realtriple_gfx12<0x26c>;
10581056
defm V_MAXIMUMMINIMUM_F32 : VOP3Only_Realtriple_gfx12<0x26d>;
1059-
defm V_MINIMUMMAXIMUM_F16 : VOP3Only_Realtriple_t16_gfx12<0x26e>;
1060-
defm V_MAXIMUMMINIMUM_F16 : VOP3Only_Realtriple_t16_gfx12<0x26f>;
1057+
defm V_MINIMUMMAXIMUM_F16 : VOP3_Realtriple_t16_and_f16_gfx12<0x26e, "v_minimummaximum_f16">;
1058+
defm V_MAXIMUMMINIMUM_F16 : VOP3_Realtriple_t16_and_f16_gfx12<0x26f, "v_maximumminimum_f16">;
10611059
defm V_S_EXP_F32 : VOP3Only_Real_Base_gfx12<0x280>;
10621060
defm V_S_EXP_F16 : VOP3Only_Real_Base_gfx12<0x281>;
10631061
defm V_S_LOG_F32 : VOP3Only_Real_Base_gfx12<0x282>;
@@ -1074,8 +1072,8 @@ defm V_MINIMUM_F64 : VOP3Only_Real_Base_gfx12<0x341>;
10741072
defm V_MAXIMUM_F64 : VOP3Only_Real_Base_gfx12<0x342>;
10751073
defm V_MINIMUM_F32 : VOP3Only_Realtriple_gfx12<0x365>;
10761074
defm V_MAXIMUM_F32 : VOP3Only_Realtriple_gfx12<0x366>;
1077-
defm V_MINIMUM_F16 : VOP3Only_Realtriple_t16_gfx12<0x367>;
1078-
defm V_MAXIMUM_F16 : VOP3Only_Realtriple_t16_gfx12<0x368>;
1075+
defm V_MINIMUM_F16 : VOP3_Realtriple_t16_and_f16_gfx12<0x367, "v_minimum_f16">;
1076+
defm V_MAXIMUM_F16 : VOP3_Realtriple_t16_and_f16_gfx12<0x368, "v_maximum_f16">;
10791077

10801078
defm V_PERMLANE16_VAR_B32 : VOP3Only_Real_Base_gfx12<0x30f>;
10811079
defm V_PERMLANEX16_VAR_B32 : VOP3Only_Real_Base_gfx12<0x310>;
@@ -1108,6 +1106,17 @@ multiclass VOP3_Realtriple_with_name_gfx11_gfx12<bits<10> op, string opName,
11081106
multiclass VOP3Dot_Realtriple_gfx11_gfx12<bits<10> op> :
11091107
VOP3Dot_Realtriple<GFX11Gen, op>, VOP3Dot_Realtriple<GFX12Gen, op>;
11101108

1109+
multiclass VOP3_Realtriple_t16_gfx11_gfx12<bits<10> op, string asmName, string opName = NAME,
1110+
string pseudo_mnemonic = "", bit isSingle = 0> :
1111+
VOP3_Realtriple_with_name<GFX11Gen, op, opName, asmName, pseudo_mnemonic, isSingle>,
1112+
VOP3_Realtriple_with_name<GFX12Gen, op, opName, asmName, pseudo_mnemonic, isSingle>;
1113+
1114+
multiclass VOP3_Realtriple_t16_and_f16_gfx11_gfx12<bits<10> op, string asmName, string opName = NAME,
1115+
string pseudo_mnemonic = "", bit isSingle = 0> {
1116+
defm opName#"_t16": VOP3_Realtriple_t16_gfx11_gfx12<op, asmName, opName#"_t16", pseudo_mnemonic, isSingle>;
1117+
defm opName#"_fake16": VOP3_Realtriple_t16_gfx11_gfx12<op, asmName, opName#"_fake16", pseudo_mnemonic, isSingle>;
1118+
}
1119+
11111120
multiclass VOP3be_Real_gfx11_gfx12<bits<10> op, string opName, string asmName> :
11121121
VOP3be_Real<GFX11Gen, op, opName, asmName>,
11131122
VOP3be_Real<GFX12Gen, op, opName, asmName>;
@@ -1128,7 +1137,7 @@ defm V_BFI_B32 : VOP3_Realtriple_gfx11_gfx12<0x212>;
11281137
defm V_FMA_F32 : VOP3_Realtriple_gfx11_gfx12<0x213>;
11291138
defm V_FMA_F64 : VOP3_Real_Base_gfx11_gfx12<0x214>;
11301139
defm V_LERP_U8 : VOP3_Realtriple_gfx11_gfx12<0x215>;
1131-
defm V_ALIGNBIT_B32 : VOP3_Realtriple_gfx11_gfx12<0x216>;
1140+
defm V_ALIGNBIT_B32 : VOP3_Realtriple_t16_and_f16_gfx11_gfx12<0x216, "v_alignbit_b32">;
11321141
defm V_ALIGNBYTE_B32 : VOP3_Realtriple_gfx11_gfx12<0x217>;
11331142
defm V_MULLIT_F32 : VOP3_Realtriple_gfx11_gfx12<0x218>;
11341143
defm V_MIN3_F32 : VOP3_Realtriple_gfx11<0x219>;

0 commit comments

Comments
 (0)