Skip to content

Commit 536f8ea

Browse files
committed
true16 for minimummaximum/maximumminimum/maximum/minimum
1 parent 1a8f49f commit 536f8ea

File tree

8 files changed

+1953
-745
lines changed

8 files changed

+1953
-745
lines changed

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -170,8 +170,8 @@ defm V_MUL_HI_I32 : VOP3Inst <"v_mul_hi_i32", V_MUL_PROF<VOP_I32_I32_I32>, mulhs
170170
let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0, AddedComplexity = 1 in {
171171
defm V_MINIMUM_F32 : VOP3Inst <"v_minimum_f32", VOP3_Profile<VOP_F32_F32_F32>, DivergentBinFrag<fminimum>>;
172172
defm V_MAXIMUM_F32 : VOP3Inst <"v_maximum_f32", VOP3_Profile<VOP_F32_F32_F32>, DivergentBinFrag<fmaximum>>;
173-
defm V_MINIMUM_F16 : VOP3Inst <"v_minimum_f16", VOP3_Profile<VOP_F16_F16_F16>, DivergentBinFrag<fminimum>>;
174-
defm V_MAXIMUM_F16 : VOP3Inst <"v_maximum_f16", VOP3_Profile<VOP_F16_F16_F16>, DivergentBinFrag<fmaximum>>;
173+
defm V_MINIMUM_F16 : VOP3Inst_t16 <"v_minimum_f16", VOP_F16_F16_F16, DivergentBinFrag<fminimum>>;
174+
defm V_MAXIMUM_F16 : VOP3Inst_t16 <"v_maximum_f16", VOP_F16_F16_F16, DivergentBinFrag<fmaximum>>;
175175

176176
let SchedRW = [WriteDoubleAdd] in {
177177
defm V_MINIMUM_F64 : VOP3Inst <"v_minimum_f64", VOP3_Profile<VOP_F64_F64_F64>, fminimum>;
@@ -634,8 +634,8 @@ defm V_MAX3_I16 : VOP3Inst_t16 <"v_max3_i16", VOP_I16_I16_I16_I16, AMDGPUsmax3>;
634634
defm V_MAX3_U16 : VOP3Inst_t16 <"v_max3_u16", VOP_I16_I16_I16_I16, AMDGPUumax3>;
635635

636636
let SubtargetPredicate = HasMinimum3Maximum3F16, ReadsModeReg = 0 in {
637-
defm V_MINIMUM3_F16 : VOP3Inst <"v_minimum3_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, AMDGPUfminimum3>;
638-
defm V_MAXIMUM3_F16 : VOP3Inst <"v_maximum3_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, AMDGPUfmaximum3>;
637+
defm V_MINIMUM3_F16 : VOP3Inst_t16 <"v_minimum3_f16", VOP_F16_F16_F16_F16, AMDGPUfminimum3>;
638+
defm V_MAXIMUM3_F16 : VOP3Inst_t16 <"v_maximum3_f16", VOP_F16_F16_F16_F16, AMDGPUfmaximum3>;
639639
} // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0
640640

641641
defm V_ADD_I16 : VOP3Inst_t16 <"v_add_i16", VOP_I16_I16_I16>;
@@ -1440,8 +1440,8 @@ let SubtargetPredicate = HasF32ToF16BF16ConversionSRInsts in {
14401440
let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in {
14411441
defm V_MAXIMUMMINIMUM_F32 : VOP3Inst<"v_maximumminimum_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
14421442
defm V_MINIMUMMAXIMUM_F32 : VOP3Inst<"v_minimummaximum_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
1443-
defm V_MAXIMUMMINIMUM_F16 : VOP3Inst<"v_maximumminimum_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>>;
1444-
defm V_MINIMUMMAXIMUM_F16 : VOP3Inst<"v_minimummaximum_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>>;
1443+
defm V_MAXIMUMMINIMUM_F16 : VOP3Inst_t16<"v_maximumminimum_f16", VOP_F16_F16_F16_F16>;
1444+
defm V_MINIMUMMAXIMUM_F16 : VOP3Inst_t16<"v_minimummaximum_f16", VOP_F16_F16_F16_F16>;
14451445
} // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0
14461446

14471447
let SubtargetPredicate = HasDot9Insts, IsDOT=1 in {
@@ -1591,8 +1591,8 @@ defm V_MIN3_NUM_F16 : VOP3_Realtriple_t16_and_fake16_gfx12<0x22b, "v_min3_
15911591
defm V_MAX3_NUM_F16 : VOP3_Realtriple_t16_and_fake16_gfx12<0x22c, "v_max3_num_f16", "V_MAX3_F16", "v_max3_f16">;
15921592
defm V_MINIMUM3_F32 : VOP3Only_Realtriple_gfx12<0x22d>;
15931593
defm V_MAXIMUM3_F32 : VOP3Only_Realtriple_gfx12<0x22e>;
1594-
defm V_MINIMUM3_F16 : VOP3Only_Realtriple_t16_gfx12<0x22f>;
1595-
defm V_MAXIMUM3_F16 : VOP3Only_Realtriple_t16_gfx12<0x230>;
1594+
defm V_MINIMUM3_F16 : VOP3Only_Realtriple_t16_and_fake16_gfx12<0x22f, "v_minimum3_f16">;
1595+
defm V_MAXIMUM3_F16 : VOP3Only_Realtriple_t16_and_fake16_gfx12<0x230, "v_maximum3_f16">;
15961596
defm V_MED3_NUM_F32 : VOP3_Realtriple_with_name_gfx12<0x231, "V_MED3_F32", "v_med3_num_f32">;
15971597
defm V_MED3_NUM_F16 : VOP3_Realtriple_t16_and_fake16_gfx12<0x232, "v_med3_num_f16", "V_MED3_F16", "v_med3_f16">;
15981598
defm V_MINMAX_NUM_F32 : VOP3_Realtriple_with_name_gfx12<0x268, "V_MINMAX_F32", "v_minmax_num_f32">;
@@ -1601,8 +1601,8 @@ defm V_MINMAX_NUM_F16 : VOP3_Realtriple_t16_and_fake16_gfx12<0x26a, "v_minma
16011601
defm V_MAXMIN_NUM_F16 : VOP3_Realtriple_t16_and_fake16_gfx12<0x26b, "v_maxmin_num_f16", "V_MAXMIN_F16", "v_maxmin_f16">;
16021602
defm V_MINIMUMMAXIMUM_F32 : VOP3Only_Realtriple_gfx12<0x26c>;
16031603
defm V_MAXIMUMMINIMUM_F32 : VOP3Only_Realtriple_gfx12<0x26d>;
1604-
defm V_MINIMUMMAXIMUM_F16 : VOP3Only_Realtriple_t16_gfx12<0x26e>;
1605-
defm V_MAXIMUMMINIMUM_F16 : VOP3Only_Realtriple_t16_gfx12<0x26f>;
1604+
defm V_MINIMUMMAXIMUM_F16 : VOP3Only_Realtriple_t16_and_fake16_gfx12<0x26e, "v_minimummaximum_f16">;
1605+
defm V_MAXIMUMMINIMUM_F16 : VOP3Only_Realtriple_t16_and_fake16_gfx12<0x26f, "v_maximumminimum_f16">;
16061606
defm V_S_EXP_F32 : VOP3Only_Real_Base_gfx12<0x280>;
16071607
defm V_S_EXP_F16 : VOP3Only_Real_Base_gfx12<0x281>;
16081608
defm V_S_LOG_F32 : VOP3Only_Real_Base_gfx12<0x282>;
@@ -1619,8 +1619,8 @@ defm V_MINIMUM_F64 : VOP3Only_Real_Base_gfx12<0x341>;
16191619
defm V_MAXIMUM_F64 : VOP3Only_Real_Base_gfx12<0x342>;
16201620
defm V_MINIMUM_F32 : VOP3Only_Realtriple_gfx12<0x365>;
16211621
defm V_MAXIMUM_F32 : VOP3Only_Realtriple_gfx12<0x366>;
1622-
defm V_MINIMUM_F16 : VOP3Only_Realtriple_t16_gfx12<0x367>;
1623-
defm V_MAXIMUM_F16 : VOP3Only_Realtriple_t16_gfx12<0x368>;
1622+
defm V_MINIMUM_F16 : VOP3Only_Realtriple_t16_and_fake16_gfx12<0x367, "v_minimum_f16">;
1623+
defm V_MAXIMUM_F16 : VOP3Only_Realtriple_t16_and_fake16_gfx12<0x368, "v_maximum_f16">;
16241624

16251625
defm V_PERMLANE16_VAR_B32 : VOP3Only_Real_Base_gfx12<0x30f>;
16261626
defm V_PERMLANEX16_VAR_B32 : VOP3Only_Real_Base_gfx12<0x310>;

llvm/lib/Target/AMDGPU/VOPInstructions.td

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1947,9 +1947,6 @@ multiclass VOP3Only_Realtriple_gfx12<bits<10> op, bit isSingle = 0> :
19471947
multiclass VOP3Only_Real_Base_gfx12<bits<10> op> :
19481948
VOP3_Real_Base<GFX12Gen, op, NAME, 1/*IsSingle*/>;
19491949

1950-
multiclass VOP3Only_Realtriple_t16_gfx12<bits<10> op> :
1951-
VOP3Only_Realtriple<GFX12Gen, op>;
1952-
19531950
multiclass VOP3_Realtriple_t16_gfx12<bits<10> op, string asmName, string opName = NAME,
19541951
string pseudo_mnemonic = "", bit isSingle = 0> :
19551952
VOP3_Realtriple_with_name<GFX12Gen, op, opName, asmName, pseudo_mnemonic, isSingle>;
@@ -1960,6 +1957,16 @@ multiclass VOP3_Realtriple_t16_and_fake16_gfx12<bits<10> op, string asmName, str
19601957
defm _fake16:VOP3_Realtriple_t16_gfx12<op, asmName, opName#"_fake16", pseudo_mnemonic, isSingle>;
19611958
}
19621959

1960+
multiclass VOP3Only_Realtriple_t16_gfx12<bits<10> op, string asmName,
1961+
string opName = NAME, string pseudo_mnemonic = "">
1962+
: VOP3_Realtriple_t16_gfx12<op, asmName, opName, pseudo_mnemonic, 1>;
1963+
1964+
multiclass VOP3Only_Realtriple_t16_and_fake16_gfx12<bits<10> op, string asmName,
1965+
string opName = NAME, string pseudo_mnemonic = ""> {
1966+
defm _t16 : VOP3Only_Realtriple_t16_gfx12<op, asmName, opName#"_t16", pseudo_mnemonic>;
1967+
defm _fake16 : VOP3Only_Realtriple_t16_gfx12<op, asmName, opName#"_fake16", pseudo_mnemonic>;
1968+
}
1969+
19631970
multiclass VOP3be_Real_with_name_gfx12<bits<10> op, string opName,
19641971
string asmName, bit isSingle = 0> {
19651972
defvar ps = !cast<VOP3_Pseudo>(opName#"_e64");

0 commit comments

Comments
 (0)