@@ -74,6 +74,7 @@ class VOP1_Real <VOP1_Pseudo ps, int EncodingFamily, string real_name = ps.Mnemo
74
74
75
75
// copy relevant pseudo op flags
76
76
let SubtargetPredicate = ps.SubtargetPredicate;
77
+ let OtherPredicates = ps.OtherPredicates;
77
78
let AsmMatchConverter = ps.AsmMatchConverter;
78
79
let AsmVariantName = ps.AsmVariantName;
79
80
let Constraints = ps.Constraints;
@@ -157,8 +158,11 @@ multiclass VOP1Inst_t16<string opName,
157
158
let OtherPredicates = [NotHasTrue16BitInsts, Has16BitInsts] in {
158
159
defm NAME : VOP1Inst<opName, P, node>;
159
160
}
160
- let OtherPredicates = [HasTrue16BitInsts] in {
161
- defm _t16 : VOP1Inst<opName#"_t16", VOPProfile_Fake16<P>, node>;
161
+ let OtherPredicates = [UseRealTrue16Insts] in {
162
+ defm _t16 : VOP1Inst<opName#"_t16", VOPProfile_True16<P>, node>;
163
+ }
164
+ let OtherPredicates = [UseFakeTrue16Insts] in {
165
+ defm _fake16 : VOP1Inst<opName#"_fake16", VOPProfile_Fake16<P>, node>;
162
166
}
163
167
}
164
168
@@ -679,6 +683,7 @@ class VOP1_DPP<bits<8> op, VOP1_DPP_Pseudo ps, VOPProfile p = ps.Pfl, bit isDPP1
679
683
let SchedRW = ps.SchedRW;
680
684
let Uses = ps.Uses;
681
685
let TRANS = ps.TRANS;
686
+ let OtherPredicates = ps.OtherPredicates;
682
687
683
688
bits<8> vdst;
684
689
let Inst{8-0} = 0xfa;
@@ -707,6 +712,7 @@ class VOP1_DPP8<bits<8> op, VOP1_Pseudo ps, VOPProfile p = ps.Pfl> :
707
712
let Defs = ps.Defs;
708
713
let SchedRW = ps.SchedRW;
709
714
let Uses = ps.Uses;
715
+ let OtherPredicates = ps.OtherPredicates;
710
716
711
717
bits<8> vdst;
712
718
let Inst{8-0} = fi;
@@ -742,7 +748,9 @@ multiclass VOP1_Real_e32<GFXGen Gen, bits<9> op, string opName = NAME> {
742
748
multiclass VOP1_Real_e32_with_name<GFXGen Gen, bits<9> op, string opName,
743
749
string asmName> {
744
750
defvar ps = !cast<VOP1_Pseudo>(opName#"_e32");
745
- let AsmString = asmName # ps.AsmOperands in {
751
+ let AsmString = asmName # ps.AsmOperands,
752
+ DecoderNamespace = Gen.DecoderNamespace #
753
+ !if(ps.Pfl.IsRealTrue16, "", "_FAKE16") in {
746
754
defm NAME : VOP1_Real_e32<Gen, op, opName>;
747
755
}
748
756
}
@@ -761,7 +769,9 @@ multiclass VOP1_Real_dpp<GFXGen Gen, bits<9> op, string opName = NAME> {
761
769
multiclass VOP1_Real_dpp_with_name<GFXGen Gen, bits<9> op, string opName,
762
770
string asmName> {
763
771
defvar ps = !cast<VOP1_Pseudo>(opName#"_e32");
764
- let AsmString = asmName # ps.Pfl.AsmDPP16 in {
772
+ let AsmString = asmName # ps.Pfl.AsmDPP16,
773
+ DecoderNamespace = "DPP" # Gen.DecoderNamespace #
774
+ !if(ps.Pfl.IsRealTrue16, "", "_FAKE16") in {
765
775
defm NAME : VOP1_Real_dpp<Gen, op, opName>;
766
776
}
767
777
}
@@ -774,7 +784,9 @@ multiclass VOP1_Real_dpp8<GFXGen Gen, bits<9> op, string opName = NAME> {
774
784
multiclass VOP1_Real_dpp8_with_name<GFXGen Gen, bits<9> op, string opName,
775
785
string asmName> {
776
786
defvar ps = !cast<VOP1_Pseudo>(opName#"_e32");
777
- let AsmString = asmName # ps.Pfl.AsmDPP8 in {
787
+ let AsmString = asmName # ps.Pfl.AsmDPP8,
788
+ DecoderNamespace = "DPP8" # Gen.DecoderNamespace #
789
+ !if(ps.Pfl.IsRealTrue16, "", "_FAKE16") in {
778
790
defm NAME : VOP1_Real_dpp8<Gen, op, opName>;
779
791
}
780
792
}
@@ -854,29 +866,30 @@ defm V_CLS_I32 : VOP1_Real_FULL_with_name_gfx11_gfx12<0x03b,
854
866
"V_FFBH_I32", "v_cls_i32">;
855
867
defm V_PERMLANE64_B32 : VOP1Only_Real_gfx11_gfx12<0x067>;
856
868
defm V_MOV_B16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x01c, "v_mov_b16">;
857
- defm V_NOT_B16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x069, "v_not_b16">;
858
- defm V_CVT_I32_I16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x06a, "v_cvt_i32_i16">;
859
- defm V_CVT_U32_U16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x06b, "v_cvt_u32_u16">;
869
+ defm V_NOT_B16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x069, "v_not_b16">;
870
+ defm V_CVT_I32_I16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x06a, "v_cvt_i32_i16">;
871
+ defm V_CVT_U32_U16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x06b, "v_cvt_u32_u16">;
860
872
861
873
defm V_CVT_F16_U16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x050, "v_cvt_f16_u16">;
862
874
defm V_CVT_F16_I16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x051, "v_cvt_f16_i16">;
863
875
defm V_CVT_U16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x052, "v_cvt_u16_f16">;
864
876
defm V_CVT_I16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x053, "v_cvt_i16_f16">;
865
- defm V_RCP_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x054, "v_rcp_f16">;
866
- defm V_SQRT_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x055, "v_sqrt_f16">;
867
- defm V_RSQ_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x056, "v_rsq_f16">;
868
- defm V_LOG_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x057, "v_log_f16">;
869
- defm V_EXP_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x058, "v_exp_f16">;
870
- defm V_FREXP_MANT_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x059, "v_frexp_mant_f16">;
877
+ defm V_RCP_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x054, "v_rcp_f16">;
878
+ defm V_SQRT_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x055, "v_sqrt_f16">;
879
+ defm V_RSQ_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x056, "v_rsq_f16">;
880
+ defm V_LOG_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x057, "v_log_f16">;
881
+ defm V_EXP_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x058, "v_exp_f16">;
882
+ defm V_FREXP_MANT_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x059, "v_frexp_mant_f16">;
871
883
defm V_FREXP_EXP_I16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05a, "v_frexp_exp_i16_f16">;
872
- defm V_FLOOR_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05b, "v_floor_f16">;
884
+ defm V_FLOOR_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05b, "v_floor_f16">;
873
885
defm V_CEIL_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05c, "v_ceil_f16">;
874
- defm V_TRUNC_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05d, "v_trunc_f16">;
875
- defm V_RNDNE_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05e, "v_rndne_f16">;
876
- defm V_FRACT_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05f, "v_fract_f16">;
877
- defm V_SIN_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x060, "v_sin_f16">;
878
- defm V_COS_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x061, "v_cos_f16">;
879
- defm V_SAT_PK_U8_I16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x062, "v_sat_pk_u8_i16">;
886
+ defm V_CEIL_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05c, "v_ceil_f16">;
887
+ defm V_TRUNC_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05d, "v_trunc_f16">;
888
+ defm V_RNDNE_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05e, "v_rndne_f16">;
889
+ defm V_FRACT_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05f, "v_fract_f16">;
890
+ defm V_SIN_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x060, "v_sin_f16">;
891
+ defm V_COS_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x061, "v_cos_f16">;
892
+ defm V_SAT_PK_U8_I16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x062, "v_sat_pk_u8_i16">;
880
893
defm V_CVT_NORM_I16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x063, "v_cvt_norm_i16_f16">;
881
894
defm V_CVT_NORM_U16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x064, "v_cvt_norm_u16_f16">;
882
895
0 commit comments