@@ -167,8 +167,8 @@ defm V_MUL_HI_I32 : VOP3Inst <"v_mul_hi_i32", V_MUL_PROF<VOP_I32_I32_I32>, mulhs
167
167
let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in {
168
168
defm V_MINIMUM_F32 : VOP3Inst <"v_minimum_f32", VOP3_Profile<VOP_F32_F32_F32>, DivergentBinFrag<fminimum>>;
169
169
defm V_MAXIMUM_F32 : VOP3Inst <"v_maximum_f32", VOP3_Profile<VOP_F32_F32_F32>, DivergentBinFrag<fmaximum>>;
170
- defm V_MINIMUM_F16 : VOP3Inst <"v_minimum_f16", VOP3_Profile< VOP_F16_F16_F16> , DivergentBinFrag<fminimum>>;
171
- defm V_MAXIMUM_F16 : VOP3Inst <"v_maximum_f16", VOP3_Profile< VOP_F16_F16_F16> , DivergentBinFrag<fmaximum>>;
170
+ defm V_MINIMUM_F16 : VOP3Inst_t16 <"v_minimum_f16", VOP_F16_F16_F16, DivergentBinFrag<fminimum>>;
171
+ defm V_MAXIMUM_F16 : VOP3Inst_t16 <"v_maximum_f16", VOP_F16_F16_F16, DivergentBinFrag<fmaximum>>;
172
172
173
173
let SchedRW = [WriteDoubleAdd] in {
174
174
defm V_MINIMUM_F64 : VOP3Inst <"v_minimum_f64", VOP3_Profile<VOP_F64_F64_F64>, fminimum>;
@@ -208,7 +208,11 @@ defm V_CUBEMA_F32 : VOP3Inst <"v_cubema_f32", VOP3_Profile<VOP_F32_F32_F32_F32>,
208
208
defm V_BFE_U32 : VOP3Inst <"v_bfe_u32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUbfe_u32>;
209
209
defm V_BFE_I32 : VOP3Inst <"v_bfe_i32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUbfe_i32>;
210
210
defm V_BFI_B32 : VOP3Inst <"v_bfi_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUbfi>;
211
- defm V_ALIGNBIT_B32 : VOP3Inst <"v_alignbit_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, fshr>;
211
+
212
+ defm V_ALIGNBIT_B32 : VOP3Inst_t16_with_profiles <"v_alignbit_b32", VOP3_Profile<VOP_I32_I32_I32_I32>,
213
+ VOP3_Profile_True16<VOP_I32_I32_I32_I16, VOP3_OPSEL>, VOP3_Profile_Fake16<VOP_I32_I32_I32_I16, VOP3_OPSEL>,
214
+ fshr, null_frag>;
215
+
212
216
defm V_ALIGNBYTE_B32 : VOP3Inst <"v_alignbyte_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_alignbyte>;
213
217
214
218
// XXX - No FPException seems suspect but manual doesn't say it does
@@ -573,16 +577,10 @@ def VOP3_CVT_SR_F8_F32_Profile : VOP3_Profile<VOPProfile<[i32, f32, i32, f32]>,
573
577
getAsmVOP3OpSel<3, HasClamp, HasOMod,
574
578
HasSrc0FloatMods, HasSrc1FloatMods,
575
579
HasSrc2FloatMods>.ret);
576
- let AsmVOP3DPP16 = !subst(", $src2_modifiers", "",
577
- getAsmVOP3DPP16<getAsmVOP3Base<3, 1, HasClamp, 1,
578
- HasOMod, 0, 1, HasSrc0FloatMods,
579
- HasSrc1FloatMods,
580
- HasSrc2FloatMods>.ret>.ret);
581
- let AsmVOP3DPP8 = !subst(", $src2_modifiers", "",
582
- getAsmVOP3DPP8<getAsmVOP3Base<3, 1, HasClamp, 1,
583
- HasOMod, 0, 1, HasSrc0FloatMods,
584
- HasSrc1FloatMods,
585
- HasSrc2FloatMods>.ret>.ret);
580
+ let AsmVOP3Base = !subst(", $src2_modifiers", "",
581
+ getAsmVOP3Base<NumSrcArgs, HasDst, HasClamp,
582
+ HasOpSel, HasOMod, IsVOP3P, HasModifiers, HasModifiers, 0/*Src1Mods*/,
583
+ HasModifiers, DstVT>.ret);
586
584
}
587
585
588
586
class VOP3_CVT_SR_F8_ByteSel_Profile<ValueType SrcVT> :
@@ -636,8 +634,8 @@ defm V_MAX3_I16 : VOP3Inst <"v_max3_i16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3
636
634
defm V_MAX3_U16 : VOP3Inst <"v_max3_u16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>, AMDGPUumax3>;
637
635
638
636
let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in {
639
- defm V_MINIMUM3_F16 : VOP3Inst <"v_minimum3_f16", VOP3_Profile< VOP_F16_F16_F16_F16, VOP3_OPSEL> , AMDGPUfminimum3>;
640
- defm V_MAXIMUM3_F16 : VOP3Inst <"v_maximum3_f16", VOP3_Profile< VOP_F16_F16_F16_F16, VOP3_OPSEL> , AMDGPUfmaximum3>;
637
+ defm V_MINIMUM3_F16 : VOP3Inst_t16 <"v_minimum3_f16", VOP_F16_F16_F16_F16, AMDGPUfminimum3>;
638
+ defm V_MAXIMUM3_F16 : VOP3Inst_t16 <"v_maximum3_f16", VOP_F16_F16_F16_F16, AMDGPUfmaximum3>;
641
639
} // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0
642
640
643
641
defm V_ADD_I16 : VOP3Inst <"v_add_i16", VOP3_Profile<VOP_I16_I16_I16, VOP3_OPSEL>>;
@@ -940,8 +938,8 @@ let SubtargetPredicate = isGFX11Plus in {
940
938
let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in {
941
939
defm V_MAXIMUMMINIMUM_F32 : VOP3Inst<"v_maximumminimum_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
942
940
defm V_MINIMUMMAXIMUM_F32 : VOP3Inst<"v_minimummaximum_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
943
- defm V_MAXIMUMMINIMUM_F16 : VOP3Inst <"v_maximumminimum_f16", VOP3_Profile< VOP_F16_F16_F16_F16, VOP3_OPSEL> >;
944
- defm V_MINIMUMMAXIMUM_F16 : VOP3Inst <"v_minimummaximum_f16", VOP3_Profile< VOP_F16_F16_F16_F16, VOP3_OPSEL> >;
941
+ defm V_MAXIMUMMINIMUM_F16 : VOP3Inst_t16 <"v_maximumminimum_f16", VOP_F16_F16_F16_F16>;
942
+ defm V_MINIMUMMAXIMUM_F16 : VOP3Inst_t16 <"v_minimummaximum_f16", VOP_F16_F16_F16_F16>;
945
943
} // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0
946
944
947
945
let OtherPredicates = [HasDot9Insts], IsDOT=1 in {
@@ -1046,8 +1044,8 @@ defm V_MIN3_NUM_F16 : VOP3_Realtriple_with_name_gfx12<0x22b, "V_MIN3_F16",
1046
1044
defm V_MAX3_NUM_F16 : VOP3_Realtriple_with_name_gfx12<0x22c, "V_MAX3_F16", "v_max3_num_f16">;
1047
1045
defm V_MINIMUM3_F32 : VOP3Only_Realtriple_gfx12<0x22d>;
1048
1046
defm V_MAXIMUM3_F32 : VOP3Only_Realtriple_gfx12<0x22e>;
1049
- defm V_MINIMUM3_F16 : VOP3Only_Realtriple_t16_gfx12 <0x22f>;
1050
- defm V_MAXIMUM3_F16 : VOP3Only_Realtriple_t16_gfx12 <0x230>;
1047
+ defm V_MINIMUM3_F16 : VOP3_Realtriple_t16_and_f16_gfx12 <0x22f, "v_minimum3_f16" >;
1048
+ defm V_MAXIMUM3_F16 : VOP3_Realtriple_t16_and_f16_gfx12 <0x230, "v_maximum3_f16" >;
1051
1049
defm V_MED3_NUM_F32 : VOP3_Realtriple_with_name_gfx12<0x231, "V_MED3_F32", "v_med3_num_f32">;
1052
1050
defm V_MED3_NUM_F16 : VOP3_Realtriple_with_name_gfx12<0x232, "V_MED3_F16", "v_med3_num_f16">;
1053
1051
defm V_MINMAX_NUM_F32 : VOP3_Realtriple_with_name_gfx12<0x268, "V_MINMAX_F32", "v_minmax_num_f32">;
@@ -1056,8 +1054,8 @@ defm V_MINMAX_NUM_F16 : VOP3_Realtriple_with_name_gfx12<0x26a, "V_MINMAX_F16
1056
1054
defm V_MAXMIN_NUM_F16 : VOP3_Realtriple_with_name_gfx12<0x26b, "V_MAXMIN_F16", "v_maxmin_num_f16">;
1057
1055
defm V_MINIMUMMAXIMUM_F32 : VOP3Only_Realtriple_gfx12<0x26c>;
1058
1056
defm V_MAXIMUMMINIMUM_F32 : VOP3Only_Realtriple_gfx12<0x26d>;
1059
- defm V_MINIMUMMAXIMUM_F16 : VOP3Only_Realtriple_t16_gfx12 <0x26e>;
1060
- defm V_MAXIMUMMINIMUM_F16 : VOP3Only_Realtriple_t16_gfx12 <0x26f>;
1057
+ defm V_MINIMUMMAXIMUM_F16 : VOP3_Realtriple_t16_and_f16_gfx12 <0x26e, "v_minimummaximum_f16" >;
1058
+ defm V_MAXIMUMMINIMUM_F16 : VOP3_Realtriple_t16_and_f16_gfx12 <0x26f, "v_maximumminimum_f16" >;
1061
1059
defm V_S_EXP_F32 : VOP3Only_Real_Base_gfx12<0x280>;
1062
1060
defm V_S_EXP_F16 : VOP3Only_Real_Base_gfx12<0x281>;
1063
1061
defm V_S_LOG_F32 : VOP3Only_Real_Base_gfx12<0x282>;
@@ -1074,8 +1072,8 @@ defm V_MINIMUM_F64 : VOP3Only_Real_Base_gfx12<0x341>;
1074
1072
defm V_MAXIMUM_F64 : VOP3Only_Real_Base_gfx12<0x342>;
1075
1073
defm V_MINIMUM_F32 : VOP3Only_Realtriple_gfx12<0x365>;
1076
1074
defm V_MAXIMUM_F32 : VOP3Only_Realtriple_gfx12<0x366>;
1077
- defm V_MINIMUM_F16 : VOP3Only_Realtriple_t16_gfx12 <0x367>;
1078
- defm V_MAXIMUM_F16 : VOP3Only_Realtriple_t16_gfx12 <0x368>;
1075
+ defm V_MINIMUM_F16 : VOP3_Realtriple_t16_and_f16_gfx12 <0x367, "v_minimum_f16" >;
1076
+ defm V_MAXIMUM_F16 : VOP3_Realtriple_t16_and_f16_gfx12 <0x368, "v_maximum_f16" >;
1079
1077
1080
1078
defm V_PERMLANE16_VAR_B32 : VOP3Only_Real_Base_gfx12<0x30f>;
1081
1079
defm V_PERMLANEX16_VAR_B32 : VOP3Only_Real_Base_gfx12<0x310>;
@@ -1108,6 +1106,17 @@ multiclass VOP3_Realtriple_with_name_gfx11_gfx12<bits<10> op, string opName,
1108
1106
multiclass VOP3Dot_Realtriple_gfx11_gfx12<bits<10> op> :
1109
1107
VOP3Dot_Realtriple<GFX11Gen, op>, VOP3Dot_Realtriple<GFX12Gen, op>;
1110
1108
1109
+ multiclass VOP3_Realtriple_t16_gfx11_gfx12<bits<10> op, string asmName, string opName = NAME,
1110
+ string pseudo_mnemonic = "", bit isSingle = 0> :
1111
+ VOP3_Realtriple_with_name<GFX11Gen, op, opName, asmName, pseudo_mnemonic, isSingle>,
1112
+ VOP3_Realtriple_with_name<GFX12Gen, op, opName, asmName, pseudo_mnemonic, isSingle>;
1113
+
1114
+ multiclass VOP3_Realtriple_t16_and_f16_gfx11_gfx12<bits<10> op, string asmName, string opName = NAME,
1115
+ string pseudo_mnemonic = "", bit isSingle = 0> {
1116
+ defm opName#"_t16": VOP3_Realtriple_t16_gfx11_gfx12<op, asmName, opName#"_t16", pseudo_mnemonic, isSingle>;
1117
+ defm opName#"_fake16": VOP3_Realtriple_t16_gfx11_gfx12<op, asmName, opName#"_fake16", pseudo_mnemonic, isSingle>;
1118
+ }
1119
+
1111
1120
multiclass VOP3be_Real_gfx11_gfx12<bits<10> op, string opName, string asmName> :
1112
1121
VOP3be_Real<GFX11Gen, op, opName, asmName>,
1113
1122
VOP3be_Real<GFX12Gen, op, opName, asmName>;
@@ -1128,7 +1137,7 @@ defm V_BFI_B32 : VOP3_Realtriple_gfx11_gfx12<0x212>;
1128
1137
defm V_FMA_F32 : VOP3_Realtriple_gfx11_gfx12<0x213>;
1129
1138
defm V_FMA_F64 : VOP3_Real_Base_gfx11_gfx12<0x214>;
1130
1139
defm V_LERP_U8 : VOP3_Realtriple_gfx11_gfx12<0x215>;
1131
- defm V_ALIGNBIT_B32 : VOP3_Realtriple_gfx11_gfx12 <0x216>;
1140
+ defm V_ALIGNBIT_B32 : VOP3_Realtriple_t16_and_f16_gfx11_gfx12 <0x216, "v_alignbit_b32" >;
1132
1141
defm V_ALIGNBYTE_B32 : VOP3_Realtriple_gfx11_gfx12<0x217>;
1133
1142
defm V_MULLIT_F32 : VOP3_Realtriple_gfx11_gfx12<0x218>;
1134
1143
defm V_MIN3_F32 : VOP3_Realtriple_gfx11<0x219>;
0 commit comments